[llvm] [AArch64][ISel] Use vector register for scalar CLMUL (PR #183282)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 04:25:57 PST 2026
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/183282
>From ef1a1449ee4b141b0b104e24c65e5a994b0b07d3 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 25 Feb 2026 10:53:03 +0000
Subject: [PATCH] [AArch64][ISel] Use vector register for scalar CLMUL
Even though there are only v8i8 and v1i64 variants for pmul/pmull, Using them
is faster than the current implementation for scalar CLMUL.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 41 +-
llvm/test/CodeGen/AArch64/clmul-fixed.ll | 6139 +++++++++--------
llvm/test/CodeGen/AArch64/clmul.ll | 2394 +++----
3 files changed, 4367 insertions(+), 4207 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2cd78493d2c23..0201d45010195 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1513,9 +1513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_FMLA, MVT::v4f32,
MVT::v8bf16, Legal);
+ setOperationAction(ISD::CLMUL, MVT::i8, Custom);
setOperationAction(ISD::CLMUL, {MVT::v8i8, MVT::v16i8}, Legal);
- if (Subtarget->hasAES())
+ if (Subtarget->hasAES()) {
+ setOperationAction(ISD::CLMUL, {MVT::i16, MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::CLMUL, {MVT::v1i64, MVT::v2i64}, Legal);
+ }
} else /* !isNeonAvailable */ {
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -8072,6 +8075,35 @@ SDValue AArch64TargetLowering::LowerFMA(SDValue Op, SelectionDAG &DAG) const {
return convertFromScalableVector(DAG, VT, ScalableRes);
}
+static SDValue LowerCLMUL(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert(
+ (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) &&
+ "Unexpected Type");
+ SDLoc DL(Op);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), VT, 64 / VT.getSizeInBits());
+ EVT CLMULTy = VT == MVT::i8 ? MVT::v8i8 : MVT::v1i64;
+ EVT ExtractTy = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ SDValue VecOp0 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op.getOperand(0));
+ SDValue VecOp1 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op.getOperand(1));
+
+ if (VecVT != CLMULTy) {
+ VecOp0 = DAG.getNode(ISD::BITCAST, DL, CLMULTy, VecOp0);
+ VecOp1 = DAG.getNode(ISD::BITCAST, DL, CLMULTy, VecOp1);
+ }
+ SDValue CLMUL = DAG.getNode(ISD::CLMUL, DL, CLMULTy, VecOp0, VecOp1);
+ if (ExtractTy == MVT::i32)
+ CLMUL = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, CLMUL);
+ SDValue ExtractVecElt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy, CLMUL,
+ DAG.getTargetConstant(0, DL, MVT::i64));
+ if (ExtractTy != VT)
+ ExtractVecElt = DAG.getNode(ISD::TRUNCATE, DL, VT, ExtractVecElt);
+ return ExtractVecElt;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -8435,6 +8467,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::PARTIAL_REDUCE_SUMLA:
case ISD::PARTIAL_REDUCE_FMLA:
return LowerPARTIAL_REDUCE_MLA(Op, DAG);
+ case ISD::CLMUL:
+ return LowerCLMUL(Op, DAG);
}
}
@@ -30260,7 +30294,10 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::FADD:
ReplaceAddWithADDP(N, Results, DAG, Subtarget);
return;
-
+ case ISD::CLMUL:
+ if (SDValue Result = LowerCLMUL(SDValue(N, 0), DAG))
+ Results.push_back(Result);
+ return;
case ISD::CTPOP:
case ISD::PARITY:
if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
diff --git a/llvm/test/CodeGen/AArch64/clmul-fixed.ll b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
index 23692dc456fc2..61081c3342889 100644
--- a/llvm/test/CodeGen/AArch64/clmul-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
@@ -444,1285 +444,1308 @@ define <2 x i32> @clmul_v2i32_neon(<2 x i32> %x, <2 x i32> %y) {
; }
define <1 x i128> @clmul_v1i128_neon(<1 x i128> %x, <1 x i128> %y) {
-; CHECK-LABEL: clmul_v1i128_neon:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: sub sp, sp, #1952
-; CHECK-NEXT: .cfi_def_cfa_offset 2048
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: rbit x8, x2
-; CHECK-NEXT: rbit x9, x0
-; CHECK-NEXT: and x7, x2, #0x2
-; CHECK-NEXT: and x18, x2, #0x1
-; CHECK-NEXT: and x4, x2, #0x4
-; CHECK-NEXT: and x5, x2, #0x10
-; CHECK-NEXT: and x10, x8, #0x2
-; CHECK-NEXT: and x6, x2, #0x80
-; CHECK-NEXT: and x17, x2, #0x800
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1944] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1936] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1928] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1920] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1912] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1904] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1888] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1896] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1880] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1864] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1872] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1856] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1832] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1840] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1824] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1848] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1816] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1792] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1784] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1808] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1776] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1800] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1768] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1728] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1760] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1720] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1752] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1736] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1744] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1696] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1656] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1688] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1672] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1680] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1664] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1704] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1712] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1640] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1608] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1584] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1632] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1600] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1616] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1592] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1624] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1648] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1568] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1520] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1536] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1512] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1560] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1504] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1552] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1528] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1576] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1544] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1480] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1448] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1472] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1456] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #1488] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000000000000
-; CHECK-NEXT: and x8, x8, #0x4000000000000000
-; CHECK-NEXT: mul x8, x9, x8
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x8, [sp, #1496] // 8-byte Spill
-; CHECK-NEXT: mul x8, x1, x7
-; CHECK-NEXT: str x10, [sp, #1464] // 8-byte Spill
-; CHECK-NEXT: str x8, [sp, #1032] // 8-byte Spill
-; CHECK-NEXT: mul x8, x1, x18
-; CHECK-NEXT: mul x18, x0, x18
-; CHECK-NEXT: str x8, [sp, #1008] // 8-byte Spill
-; CHECK-NEXT: mul x8, x1, x4
-; CHECK-NEXT: str x18, [sp, #1128] // 8-byte Spill
-; CHECK-NEXT: mul x18, x0, x4
-; CHECK-NEXT: str x8, [sp, #992] // 8-byte Spill
-; CHECK-NEXT: and x8, x2, #0x8
-; CHECK-NEXT: mul x9, x1, x8
-; CHECK-NEXT: str x18, [sp, #1120] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x9, [sp, #976] // 8-byte Spill
-; CHECK-NEXT: mul x9, x1, x5
-; CHECK-NEXT: str x8, [sp, #1112] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x5
-; CHECK-NEXT: str x9, [sp, #952] // 8-byte Spill
-; CHECK-NEXT: and x9, x2, #0x20
-; CHECK-NEXT: mul x10, x1, x9
-; CHECK-NEXT: str x8, [sp, #1096] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x9
-; CHECK-NEXT: ldr x9, [sp, #1008] // 8-byte Reload
-; CHECK-NEXT: str x10, [sp, #928] // 8-byte Spill
-; CHECK-NEXT: and x10, x2, #0x40
-; CHECK-NEXT: mul x11, x1, x10
-; CHECK-NEXT: str x8, [sp, #1072] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #976] // 8-byte Reload
-; CHECK-NEXT: str x11, [sp, #944] // 8-byte Spill
-; CHECK-NEXT: mul x11, x1, x6
-; CHECK-NEXT: str x8, [sp, #1104] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x6
-; CHECK-NEXT: str x11, [sp, #936] // 8-byte Spill
-; CHECK-NEXT: and x11, x2, #0x100
-; CHECK-NEXT: mul x12, x1, x11
-; CHECK-NEXT: str x8, [sp, #1064] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #928] // 8-byte Reload
-; CHECK-NEXT: str x12, [sp, #920] // 8-byte Spill
-; CHECK-NEXT: and x12, x2, #0x200
-; CHECK-NEXT: mul x13, x1, x12
-; CHECK-NEXT: str x8, [sp, #1048] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x12
-; CHECK-NEXT: str x13, [sp, #968] // 8-byte Spill
-; CHECK-NEXT: and x13, x2, #0x400
-; CHECK-NEXT: mul x14, x1, x13
-; CHECK-NEXT: str x8, [sp, #1040] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x13
-; CHECK-NEXT: str x14, [sp, #960] // 8-byte Spill
-; CHECK-NEXT: mul x14, x1, x17
-; CHECK-NEXT: str x8, [sp, #1088] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x17
-; CHECK-NEXT: str x14, [sp, #848] // 8-byte Spill
-; CHECK-NEXT: and x14, x2, #0x1000
-; CHECK-NEXT: mul x15, x1, x14
-; CHECK-NEXT: str x8, [sp, #1000] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x14
-; CHECK-NEXT: str x15, [sp, #824] // 8-byte Spill
-; CHECK-NEXT: and x15, x2, #0x2000
-; CHECK-NEXT: mul x16, x1, x15
-; CHECK-NEXT: ldr x12, [sp, #824] // 8-byte Reload
-; CHECK-NEXT: str x8, [sp, #984] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x15
-; CHECK-NEXT: str x16, [sp, #896] // 8-byte Spill
-; CHECK-NEXT: and x16, x2, #0x4000
-; CHECK-NEXT: mul x19, x1, x16
-; CHECK-NEXT: str x8, [sp, #1024] // 8-byte Spill
-; CHECK-NEXT: mul x8, x0, x16
-; CHECK-NEXT: str x19, [sp, #888] // 8-byte Spill
-; CHECK-NEXT: and x19, x2, #0x8000
-; CHECK-NEXT: mul x20, x1, x19
-; CHECK-NEXT: str x8, [sp, #1016] // 8-byte Spill
-; CHECK-NEXT: ldr x8, [sp, #1032] // 8-byte Reload
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: ldr x9, [sp, #992] // 8-byte Reload
-; CHECK-NEXT: str x20, [sp, #904] // 8-byte Spill
-; CHECK-NEXT: and x20, x2, #0x10000
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: mul x21, x1, x20
-; CHECK-NEXT: ldr x10, [sp, #952] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #944] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x11, x0, x19
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: ldr x10, [sp, #936] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #968] // 8-byte Reload
-; CHECK-NEXT: str x21, [sp, #880] // 8-byte Spill
-; CHECK-NEXT: and x21, x2, #0x20000
-; CHECK-NEXT: mul x22, x1, x21
-; CHECK-NEXT: str x11, [sp, #1032] // 8-byte Spill
-; CHECK-NEXT: ldr x11, [sp, #920] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #848] // 8-byte Reload
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: ldr x10, [sp, #896] // 8-byte Reload
-; CHECK-NEXT: str x22, [sp, #840] // 8-byte Spill
-; CHECK-NEXT: and x22, x2, #0x40000
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x23, x1, x22
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #960] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #888] // 8-byte Reload
-; CHECK-NEXT: mul x12, x0, x20
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #904] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x11, x0, x21
-; CHECK-NEXT: str x23, [sp, #832] // 8-byte Spill
-; CHECK-NEXT: and x23, x2, #0x80000
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: mul x24, x1, x23
-; CHECK-NEXT: ldr x10, [sp, #880] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: str x12, [sp, #1008] // 8-byte Spill
-; CHECK-NEXT: str x11, [sp, #992] // 8-byte Spill
-; CHECK-NEXT: ldr x11, [sp, #840] // 8-byte Reload
-; CHECK-NEXT: str x24, [sp, #872] // 8-byte Spill
-; CHECK-NEXT: and x24, x2, #0x100000
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x25, x1, x24
-; CHECK-NEXT: ldr x11, [sp, #832] // 8-byte Reload
-; CHECK-NEXT: ldr x9, [sp, #872] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x25, [sp, #864] // 8-byte Spill
-; CHECK-NEXT: and x25, x2, #0x200000
-; CHECK-NEXT: mul x26, x1, x25
-; CHECK-NEXT: str x26, [sp, #912] // 8-byte Spill
-; CHECK-NEXT: and x26, x2, #0x400000
-; CHECK-NEXT: mul x27, x1, x26
-; CHECK-NEXT: str x27, [sp, #760] // 8-byte Spill
-; CHECK-NEXT: and x27, x2, #0x800000
-; CHECK-NEXT: mul x28, x1, x27
-; CHECK-NEXT: ldr x11, [sp, #760] // 8-byte Reload
-; CHECK-NEXT: str x28, [sp, #736] // 8-byte Spill
-; CHECK-NEXT: and x28, x2, #0x1000000
-; CHECK-NEXT: mul x29, x1, x28
-; CHECK-NEXT: ldr x12, [sp, #736] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x22
-; CHECK-NEXT: str x29, [sp, #784] // 8-byte Spill
-; CHECK-NEXT: and x29, x2, #0x2000000
-; CHECK-NEXT: mul x30, x1, x29
-; CHECK-NEXT: ldr x10, [sp, #784] // 8-byte Reload
-; CHECK-NEXT: str x12, [sp, #976] // 8-byte Spill
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #864] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: str x30, [sp, #776] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: ldr x11, [sp, #776] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x11, x0, x23
-; CHECK-NEXT: str x30, [sp, #800] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000
-; CHECK-NEXT: str x30, [sp, #1144] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x11, [sp, #968] // 8-byte Spill
-; CHECK-NEXT: ldr x11, [sp, #912] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #800] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: str x30, [sp, #792] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000
-; CHECK-NEXT: str x30, [sp, #1152] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #816] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000
-; CHECK-NEXT: str x30, [sp, #1160] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000
-; CHECK-NEXT: ldr x9, [sp, #816] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #728] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000
-; CHECK-NEXT: str x30, [sp, #1168] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000
-; CHECK-NEXT: ldr x11, [sp, #728] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #696] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000
-; CHECK-NEXT: str x30, [sp, #1176] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000
-; CHECK-NEXT: ldr x12, [sp, #696] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #792] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x30, [sp, #688] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: str x30, [sp, #1184] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000
-; CHECK-NEXT: ldr x12, [sp, #688] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x24
-; CHECK-NEXT: str x30, [sp, #744] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000
-; CHECK-NEXT: str x30, [sp, #1192] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000
-; CHECK-NEXT: ldr x10, [sp, #744] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #960] // 8-byte Spill
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: str x30, [sp, #720] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000
-; CHECK-NEXT: str x30, [sp, #1200] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000
-; CHECK-NEXT: ldr x11, [sp, #720] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: str x30, [sp, #768] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000
-; CHECK-NEXT: str x30, [sp, #1208] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000
-; CHECK-NEXT: ldr x9, [sp, #768] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x30, [sp, #808] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000
-; CHECK-NEXT: str x30, [sp, #1216] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #856] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000
-; CHECK-NEXT: str x30, [sp, #1224] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #648] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000
-; CHECK-NEXT: str x30, [sp, #1232] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000
-; CHECK-NEXT: ldr x11, [sp, #648] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #632] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000
-; CHECK-NEXT: str x30, [sp, #1240] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000000
-; CHECK-NEXT: ldr x12, [sp, #632] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x25
-; CHECK-NEXT: str x30, [sp, #664] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000000
-; CHECK-NEXT: str x30, [sp, #1248] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000000
-; CHECK-NEXT: ldr x10, [sp, #664] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #952] // 8-byte Spill
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: str x30, [sp, #640] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000000
-; CHECK-NEXT: str x30, [sp, #1256] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000000
-; CHECK-NEXT: ldr x11, [sp, #640] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: str x30, [sp, #680] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000000
-; CHECK-NEXT: str x30, [sp, #1264] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #672] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000000
-; CHECK-NEXT: str x30, [sp, #1272] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #712] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000000
-; CHECK-NEXT: str x30, [sp, #1280] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #704] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000000
-; CHECK-NEXT: str x30, [sp, #1288] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #752] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000000
-; CHECK-NEXT: str x30, [sp, #1296] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #520] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000000
-; CHECK-NEXT: str x30, [sp, #1304] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000000
-; CHECK-NEXT: ldr x11, [sp, #520] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #504] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000000
-; CHECK-NEXT: str x30, [sp, #1312] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000000
-; CHECK-NEXT: ldr x12, [sp, #504] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x26
-; CHECK-NEXT: str x30, [sp, #560] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000000
-; CHECK-NEXT: str x30, [sp, #1320] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #944] // 8-byte Spill
-; CHECK-NEXT: ldr x12, [sp, #808] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #680] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #560] // 8-byte Reload
-; CHECK-NEXT: str x30, [sp, #552] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000000
-; CHECK-NEXT: str x30, [sp, #1328] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000000
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: ldr x12, [sp, #672] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #552] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x27
-; CHECK-NEXT: str x30, [sp, #584] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000000
-; CHECK-NEXT: str x30, [sp, #1336] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #936] // 8-byte Spill
-; CHECK-NEXT: ldr x12, [sp, #856] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #712] // 8-byte Reload
-; CHECK-NEXT: str x30, [sp, #576] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x8000000000000
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: str x30, [sp, #1344] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000000000
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: ldr x12, [sp, #584] // 8-byte Reload
-; CHECK-NEXT: ldr x9, [sp, #752] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #704] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #576] // 8-byte Reload
-; CHECK-NEXT: str x30, [sp, #608] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x10000000000000
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x30, [sp, #1352] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000000000
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: ldr x10, [sp, #608] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: mul x12, x0, x28
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: str x30, [sp, #592] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x20000000000000
-; CHECK-NEXT: str x30, [sp, #1360] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000000000
-; CHECK-NEXT: ldr x11, [sp, #592] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #928] // 8-byte Spill
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: str x30, [sp, #624] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x40000000000000
-; CHECK-NEXT: str x30, [sp, #1368] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000000000
-; CHECK-NEXT: ldr x9, [sp, #624] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x30, [sp, #616] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x80000000000000
-; CHECK-NEXT: str x30, [sp, #1376] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #528] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x100000000000000
-; CHECK-NEXT: str x30, [sp, #1384] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000000000
-; CHECK-NEXT: ldr x11, [sp, #528] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #512] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x200000000000000
-; CHECK-NEXT: str x30, [sp, #1392] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000000000
-; CHECK-NEXT: ldr x12, [sp, #512] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: mul x12, x0, x29
-; CHECK-NEXT: str x30, [sp, #544] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x400000000000000
-; CHECK-NEXT: str x30, [sp, #1400] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000000000
-; CHECK-NEXT: ldr x10, [sp, #544] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x12, [sp, #920] // 8-byte Spill
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #616] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: eor x21, x8, x9
-; CHECK-NEXT: str x30, [sp, #536] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x800000000000000
-; CHECK-NEXT: str x30, [sp, #1408] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000000000
-; CHECK-NEXT: ldr x11, [sp, #536] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1144] // 8-byte Reload
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: str x30, [sp, #568] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x1000000000000000
-; CHECK-NEXT: str x30, [sp, #1416] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000000000
-; CHECK-NEXT: ldr x8, [sp, #568] // 8-byte Reload
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: eor x8, x10, x8
-; CHECK-NEXT: str x11, [sp, #1144] // 8-byte Spill
-; CHECK-NEXT: str x30, [sp, #600] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x2000000000000000
-; CHECK-NEXT: str x30, [sp, #1424] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000000000
-; CHECK-NEXT: mul x30, x1, x30
-; CHECK-NEXT: str x30, [sp, #656] // 8-byte Spill
-; CHECK-NEXT: and x30, x2, #0x4000000000000000
-; CHECK-NEXT: and x2, x2, #0x8000000000000000
-; CHECK-NEXT: str x30, [sp, #1432] // 8-byte Spill
-; CHECK-NEXT: mul x30, x1, x2
-; CHECK-NEXT: and x1, x3, #0x2
-; CHECK-NEXT: str x2, [sp, #1440] // 8-byte Spill
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x1
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: stp x1, x2, [sp, #488] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x4
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x8
-; CHECK-NEXT: ldp x10, x9, [sp, #488] // 16-byte Folded Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: str x1, [sp, #456] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x10
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #456] // 8-byte Reload
-; CHECK-NEXT: stp x2, x1, [sp, #472] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x20
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x10, [sp, #472] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1152] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #480] // 8-byte Reload
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: str x1, [sp, #464] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x40
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x80
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x11, [sp, #1152] // 8-byte Spill
-; CHECK-NEXT: ldr x11, [sp, #600] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #464] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #424] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x100
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x1, [sp, #384] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x200
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #384] // 8-byte Reload
-; CHECK-NEXT: stp x1, x2, [sp, #440] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x400
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x800
-; CHECK-NEXT: ldr x11, [sp, #448] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #424] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #440] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1160] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: mul x12, x0, x12
-; CHECK-NEXT: str x1, [sp, #392] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x1000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x12, [sp, #1160] // 8-byte Spill
-; CHECK-NEXT: ldr x12, [sp, #656] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #376] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x2000
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #376] // 8-byte Reload
-; CHECK-NEXT: eor x16, x8, x30
-; CHECK-NEXT: stp x1, x2, [sp, #408] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x4000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #416] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #392] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #408] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1168] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #400] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x8000
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: mul x12, x0, x12
-; CHECK-NEXT: str x1, [sp, #432] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x10000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x20000
-; CHECK-NEXT: str x12, [sp, #1168] // 8-byte Spill
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x1, [sp, #328] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x40000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #328] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #320] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x80000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: stp x1, x2, [sp, #352] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x100000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #360] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #400] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #320] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #344] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x200000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1176] // 8-byte Reload
-; CHECK-NEXT: mul x12, x0, x12
-; CHECK-NEXT: str x1, [sp, #368] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x400000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x800000
-; CHECK-NEXT: str x12, [sp, #1176] // 8-byte Spill
-; CHECK-NEXT: ldr x12, [sp, #432] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #352] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #368] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: str x1, [sp, #232] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x1000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x13, [sp, #232] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #224] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x2000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: stp x2, x1, [sp, #280] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x4000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #280] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #344] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x13
-; CHECK-NEXT: ldr x13, [sp, #224] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #272] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x8000000
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x10000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #1184] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #288] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: ldr x12, [sp, #272] // 8-byte Reload
-; CHECK-NEXT: mul x13, x0, x13
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: stp x1, x2, [sp, #304] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x20000000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x40000000
-; CHECK-NEXT: ldr x10, [sp, #312] // 8-byte Reload
-; CHECK-NEXT: str x13, [sp, #1184] // 8-byte Spill
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: stp x1, x2, [sp, #168] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x80000000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x100000000
-; CHECK-NEXT: ldp x13, x12, [sp, #168] // 16-byte Folded Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #1192] // 8-byte Reload
-; CHECK-NEXT: mul x13, x0, x13
-; CHECK-NEXT: stp x1, x2, [sp, #200] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x200000000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x400000000
-; CHECK-NEXT: ldr x11, [sp, #208] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: ldr x12, [sp, #304] // 8-byte Reload
-; CHECK-NEXT: str x13, [sp, #1192] // 8-byte Spill
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #200] // 8-byte Reload
-; CHECK-NEXT: eor x8, x9, x10
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1200] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #216] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x800000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x10, [sp, #216] // 8-byte Reload
-; CHECK-NEXT: mul x12, x0, x12
-; CHECK-NEXT: stp x2, x1, [sp, #256] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x1000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x9, [sp, #256] // 8-byte Reload
-; CHECK-NEXT: str x12, [sp, #1200] // 8-byte Spill
-; CHECK-NEXT: eor x9, x11, x9
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: str x1, [sp, #336] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x2000000000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x4000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x1, [sp, #136] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x8000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #136] // 8-byte Reload
-; CHECK-NEXT: stp x1, x2, [sp, #152] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x10000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x10, [sp, #160] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1208] // 8-byte Reload
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: str x1, [sp, #144] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x20000000000
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x40000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x11, [sp, #1208] // 8-byte Spill
-; CHECK-NEXT: ldr x11, [sp, #264] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #152] // 8-byte Reload
-; CHECK-NEXT: stp x1, x2, [sp, #184] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x80000000000
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x2, x0, x1
-; CHECK-NEXT: and x1, x3, #0x100000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: stp x1, x2, [sp, #240] // 16-byte Folded Spill
-; CHECK-NEXT: and x1, x3, #0x200000000000
-; CHECK-NEXT: and x2, x3, #0x1000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: mul x2, x0, x2
-; CHECK-NEXT: str x1, [sp, #296] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x400000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x1, [sp, #40] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x800000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x2000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #144] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x2
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #1216] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #56] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x4000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: mul x23, x0, x12
-; CHECK-NEXT: ldr x12, [sp, #336] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #192] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #48] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x8000000000000
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #56] // 8-byte Reload
-; CHECK-NEXT: ldr x9, [sp, #248] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #184] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #48] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #96] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x10000000000000
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x10, [sp, #96] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1224] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #240] // 8-byte Reload
-; CHECK-NEXT: mul x24, x0, x12
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: str x1, [sp, #88] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x20000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #88] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1232] // 8-byte Reload
-; CHECK-NEXT: mul x25, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #296] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #112] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x40000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #112] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: str x1, [sp, #104] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x80000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: str x1, [sp, #120] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x100000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x9, [sp, #120] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #80] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x200000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x11, [sp, #80] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #32] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x400000000000000
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #104] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: str x1, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x800000000000000
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: ldr x12, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: eor x15, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #1248] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1240] // 8-byte Reload
-; CHECK-NEXT: mul x27, x0, x9
-; CHECK-NEXT: ldr x9, [sp, #1944] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #72] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x1000000000000000
-; CHECK-NEXT: mul x26, x0, x12
-; CHECK-NEXT: ldr x10, [sp, #72] // 8-byte Reload
-; CHECK-NEXT: ldr x12, [sp, #1904] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #1920] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #64] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x2000000000000000
-; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x8, x10, x8
-; CHECK-NEXT: ldr x10, [sp, #1936] // 8-byte Reload
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: ldr x10, [sp, #1928] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #128] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x4000000000000000
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1912] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1888] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1792] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #1256] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1880] // 8-byte Reload
-; CHECK-NEXT: mul x28, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #128] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #1056] // 8-byte Spill
-; CHECK-NEXT: and x1, x3, #0x8000000000000000
-; CHECK-NEXT: eor x14, x8, x11
-; CHECK-NEXT: ldr x8, [sp, #1896] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #1832] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x1
-; CHECK-NEXT: eor x10, x8, x10
-; CHECK-NEXT: ldr x8, [sp, #1864] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x8
-; CHECK-NEXT: ldr x8, [sp, #1856] // 8-byte Reload
-; CHECK-NEXT: eor x11, x8, x11
-; CHECK-NEXT: ldr x8, [sp, #1264] // 8-byte Reload
-; CHECK-NEXT: str x1, [sp, #1080] // 8-byte Spill
-; CHECK-NEXT: mul x1, x0, x7
-; CHECK-NEXT: mul x29, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1872] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x8
-; CHECK-NEXT: ldr x8, [sp, #1840] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: str x1, [sp, #1136] // 8-byte Spill
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1816] // 8-byte Reload
-; CHECK-NEXT: eor x12, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #1824] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1784] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1272] // 8-byte Reload
-; CHECK-NEXT: mul x30, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1848] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1808] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1776] // 8-byte Reload
-; CHECK-NEXT: ldr x12, [sp, #1728] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1768] // 8-byte Reload
-; CHECK-NEXT: eor x12, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #1280] // 8-byte Reload
-; CHECK-NEXT: mul x22, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1800] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1760] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1720] // 8-byte Reload
-; CHECK-NEXT: ldr x12, [sp, #1656] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1696] // 8-byte Reload
-; CHECK-NEXT: eor x12, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #1288] // 8-byte Reload
-; CHECK-NEXT: mul x20, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1752] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1688] // 8-byte Reload
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1736] // 8-byte Reload
-; CHECK-NEXT: ldr x12, [sp, #1608] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x8
-; CHECK-NEXT: ldr x8, [sp, #1672] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1296] // 8-byte Reload
-; CHECK-NEXT: mul x19, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1744] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x8
-; CHECK-NEXT: ldr x8, [sp, #1680] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1640] // 8-byte Reload
-; CHECK-NEXT: eor x12, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #1664] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1584] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1304] // 8-byte Reload
-; CHECK-NEXT: mul x7, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1704] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1632] // 8-byte Reload
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1600] // 8-byte Reload
-; CHECK-NEXT: ldr x12, [sp, #1520] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1568] // 8-byte Reload
-; CHECK-NEXT: eor x12, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #1312] // 8-byte Reload
-; CHECK-NEXT: mul x6, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1712] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x8
-; CHECK-NEXT: ldr x8, [sp, #1616] // 8-byte Reload
-; CHECK-NEXT: eor x13, x9, x10
-; CHECK-NEXT: ldr x9, [sp, #1448] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1536] // 8-byte Reload
-; CHECK-NEXT: eor x6, x7, x6
-; CHECK-NEXT: ldr x7, [sp, #1424] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1592] // 8-byte Reload
-; CHECK-NEXT: mul x7, x0, x7
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1512] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1320] // 8-byte Reload
-; CHECK-NEXT: mul x5, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #1624] // 8-byte Reload
-; CHECK-NEXT: eor x10, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1560] // 8-byte Reload
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #1504] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldr x8, [sp, #1480] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #1328] // 8-byte Reload
-; CHECK-NEXT: mul x4, x0, x9
-; CHECK-NEXT: ldr x9, [sp, #1648] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x9
-; CHECK-NEXT: ldr x9, [sp, #1552] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x10
-; CHECK-NEXT: ldr x10, [sp, #1576] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x9
-; CHECK-NEXT: ldr x9, [sp, #1472] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #1528] // 8-byte Reload
-; CHECK-NEXT: eor x9, x11, x9
-; CHECK-NEXT: ldr x11, [sp, #1456] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1488] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #1336] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #1544] // 8-byte Reload
-; CHECK-NEXT: mul x3, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #992] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1464] // 8-byte Reload
-; CHECK-NEXT: eor x9, x13, x9
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #1344] // 8-byte Reload
-; CHECK-NEXT: mul x1, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #1056] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x10
-; CHECK-NEXT: ldr x10, [sp, #1496] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #1080] // 8-byte Reload
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: ldr x9, [sp, #1352] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x10
-; CHECK-NEXT: eor x14, x21, x16
-; CHECK-NEXT: rbit x8, x8
-; CHECK-NEXT: mul x18, x0, x9
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: ldr x9, [sp, #1360] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x10, [sp, #1112] // 8-byte Reload
-; CHECK-NEXT: mul x17, x0, x9
-; CHECK-NEXT: eor x2, x13, x8, lsr #1
-; CHECK-NEXT: ldr x8, [sp, #1136] // 8-byte Reload
-; CHECK-NEXT: ldr x9, [sp, #1128] // 8-byte Reload
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: ldr x9, [sp, #1120] // 8-byte Reload
-; CHECK-NEXT: eor x13, x9, x10
-; CHECK-NEXT: ldr x9, [sp, #1096] // 8-byte Reload
-; CHECK-NEXT: ldr x10, [sp, #1072] // 8-byte Reload
-; CHECK-NEXT: eor x13, x8, x13
-; CHECK-NEXT: ldr x8, [sp, #1104] // 8-byte Reload
-; CHECK-NEXT: eor x14, x9, x10
-; CHECK-NEXT: ldr x9, [sp, #1368] // 8-byte Reload
-; CHECK-NEXT: ldr x10, [sp, #984] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x8
-; CHECK-NEXT: ldr x8, [sp, #1064] // 8-byte Reload
-; CHECK-NEXT: mul x21, x0, x9
-; CHECK-NEXT: ldr x9, [sp, #1048] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #1040] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #1000] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1376] // 8-byte Reload
-; CHECK-NEXT: mul x16, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #1088] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #1024] // 8-byte Reload
-; CHECK-NEXT: eor x8, x13, x8
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1016] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1008] // 8-byte Reload
-; CHECK-NEXT: eor x14, x10, x11
-; CHECK-NEXT: ldr x10, [sp, #1384] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #936] // 8-byte Reload
-; CHECK-NEXT: mul x15, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #1032] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #976] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x10
-; CHECK-NEXT: eor x10, x8, x9
-; CHECK-NEXT: ldr x8, [sp, #968] // 8-byte Reload
-; CHECK-NEXT: eor x9, x13, x8
-; CHECK-NEXT: ldr x8, [sp, #1392] // 8-byte Reload
-; CHECK-NEXT: mul x13, x0, x8
-; CHECK-NEXT: ldr x8, [sp, #960] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x8
-; CHECK-NEXT: ldr x8, [sp, #944] // 8-byte Reload
-; CHECK-NEXT: eor x14, x8, x11
-; CHECK-NEXT: ldr x8, [sp, #1168] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #1176] // 8-byte Reload
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #928] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x11
-; CHECK-NEXT: ldr x11, [sp, #1184] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #1400] // 8-byte Reload
-; CHECK-NEXT: mul x12, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #952] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #920] // 8-byte Reload
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: ldr x10, [sp, #1152] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x11
-; CHECK-NEXT: ldr x11, [sp, #1192] // 8-byte Reload
-; CHECK-NEXT: eor x12, x13, x12
-; CHECK-NEXT: ldr x13, [sp, #1440] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #1144] // 8-byte Reload
-; CHECK-NEXT: mul x13, x0, x13
-; CHECK-NEXT: eor x14, x14, x11
-; CHECK-NEXT: ldr x11, [sp, #1200] // 8-byte Reload
-; CHECK-NEXT: eor x10, x14, x10
-; CHECK-NEXT: ldr x14, [sp, #1208] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #1408] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x14
-; CHECK-NEXT: ldr x14, [sp, #1160] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x23
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: eor x10, x10, x14
-; CHECK-NEXT: ldr x14, [sp, #1416] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x24
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x10, x25, x26
-; CHECK-NEXT: eor x10, x10, x27
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: mul x14, x0, x14
-; CHECK-NEXT: eor x9, x10, x28
-; CHECK-NEXT: eor x10, x6, x5
-; CHECK-NEXT: eor x10, x10, x4
-; CHECK-NEXT: ldr x4, [sp, #1432] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x29
-; CHECK-NEXT: eor x10, x10, x3
-; CHECK-NEXT: eor x9, x9, x30
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: mul x4, x0, x4
-; CHECK-NEXT: eor x10, x10, x1
-; CHECK-NEXT: eor x9, x9, x22
-; CHECK-NEXT: eor x10, x10, x18
-; CHECK-NEXT: eor x9, x9, x20
-; CHECK-NEXT: eor x11, x11, x14
-; CHECK-NEXT: eor x10, x10, x17
-; CHECK-NEXT: eor x9, x9, x19
-; CHECK-NEXT: eor x11, x11, x7
-; CHECK-NEXT: eor x10, x10, x21
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: mov x1, x2
-; CHECK-NEXT: eor x9, x10, x16
-; CHECK-NEXT: eor x10, x11, x4
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x10, x13
-; CHECK-NEXT: eor x0, x8, x9
-; CHECK-NEXT: add sp, sp, #1952
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_v1i128_neon:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: sub sp, sp, #1952
+; CHECK-NEON-NEXT: .cfi_def_cfa_offset 2048
+; CHECK-NEON-NEXT: .cfi_offset w19, -8
+; CHECK-NEON-NEXT: .cfi_offset w20, -16
+; CHECK-NEON-NEXT: .cfi_offset w21, -24
+; CHECK-NEON-NEXT: .cfi_offset w22, -32
+; CHECK-NEON-NEXT: .cfi_offset w23, -40
+; CHECK-NEON-NEXT: .cfi_offset w24, -48
+; CHECK-NEON-NEXT: .cfi_offset w25, -56
+; CHECK-NEON-NEXT: .cfi_offset w26, -64
+; CHECK-NEON-NEXT: .cfi_offset w27, -72
+; CHECK-NEON-NEXT: .cfi_offset w28, -80
+; CHECK-NEON-NEXT: .cfi_offset w30, -88
+; CHECK-NEON-NEXT: .cfi_offset w29, -96
+; CHECK-NEON-NEXT: rbit x8, x2
+; CHECK-NEON-NEXT: rbit x9, x0
+; CHECK-NEON-NEXT: and x7, x2, #0x2
+; CHECK-NEON-NEXT: and x18, x2, #0x1
+; CHECK-NEON-NEXT: and x4, x2, #0x4
+; CHECK-NEON-NEXT: and x5, x2, #0x10
+; CHECK-NEON-NEXT: and x10, x8, #0x2
+; CHECK-NEON-NEXT: and x6, x2, #0x80
+; CHECK-NEON-NEXT: and x17, x2, #0x800
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1944] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1936] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1928] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1920] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1912] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1904] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1888] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1896] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1880] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1864] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1872] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1856] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1832] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1840] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1824] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1848] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1816] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1792] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1784] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1808] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1776] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1800] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1768] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1728] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1760] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1720] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1752] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1736] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1744] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1696] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1656] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1688] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1672] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1680] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1664] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1704] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1712] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1640] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1608] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1584] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1632] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1600] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1616] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1592] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1624] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1648] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1568] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1520] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1536] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1512] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1560] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1504] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1552] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1528] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1576] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1544] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1480] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1448] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1472] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1456] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1488] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000000000000
+; CHECK-NEON-NEXT: and x8, x8, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x8, x9, x8
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x8, [sp, #1496] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x1, x7
+; CHECK-NEON-NEXT: str x10, [sp, #1464] // 8-byte Spill
+; CHECK-NEON-NEXT: str x8, [sp, #1032] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x1, x18
+; CHECK-NEON-NEXT: mul x18, x0, x18
+; CHECK-NEON-NEXT: str x8, [sp, #1008] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x1, x4
+; CHECK-NEON-NEXT: str x18, [sp, #1128] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x18, x0, x4
+; CHECK-NEON-NEXT: str x8, [sp, #992] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x2, #0x8
+; CHECK-NEON-NEXT: mul x9, x1, x8
+; CHECK-NEON-NEXT: str x18, [sp, #1120] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x9, [sp, #976] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x9, x1, x5
+; CHECK-NEON-NEXT: str x8, [sp, #1112] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x5
+; CHECK-NEON-NEXT: str x9, [sp, #952] // 8-byte Spill
+; CHECK-NEON-NEXT: and x9, x2, #0x20
+; CHECK-NEON-NEXT: mul x10, x1, x9
+; CHECK-NEON-NEXT: str x8, [sp, #1096] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1008] // 8-byte Reload
+; CHECK-NEON-NEXT: str x10, [sp, #928] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x2, #0x40
+; CHECK-NEON-NEXT: mul x11, x1, x10
+; CHECK-NEON-NEXT: str x8, [sp, #1072] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #976] // 8-byte Reload
+; CHECK-NEON-NEXT: str x11, [sp, #944] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x11, x1, x6
+; CHECK-NEON-NEXT: str x8, [sp, #1104] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x6
+; CHECK-NEON-NEXT: str x11, [sp, #936] // 8-byte Spill
+; CHECK-NEON-NEXT: and x11, x2, #0x100
+; CHECK-NEON-NEXT: mul x12, x1, x11
+; CHECK-NEON-NEXT: str x8, [sp, #1064] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #928] // 8-byte Reload
+; CHECK-NEON-NEXT: str x12, [sp, #920] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x2, #0x200
+; CHECK-NEON-NEXT: mul x13, x1, x12
+; CHECK-NEON-NEXT: str x8, [sp, #1048] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x12
+; CHECK-NEON-NEXT: str x13, [sp, #968] // 8-byte Spill
+; CHECK-NEON-NEXT: and x13, x2, #0x400
+; CHECK-NEON-NEXT: mul x14, x1, x13
+; CHECK-NEON-NEXT: str x8, [sp, #1040] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x13
+; CHECK-NEON-NEXT: str x14, [sp, #960] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x14, x1, x17
+; CHECK-NEON-NEXT: str x8, [sp, #1088] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x17
+; CHECK-NEON-NEXT: str x14, [sp, #848] // 8-byte Spill
+; CHECK-NEON-NEXT: and x14, x2, #0x1000
+; CHECK-NEON-NEXT: mul x15, x1, x14
+; CHECK-NEON-NEXT: str x8, [sp, #1000] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x14
+; CHECK-NEON-NEXT: str x15, [sp, #824] // 8-byte Spill
+; CHECK-NEON-NEXT: and x15, x2, #0x2000
+; CHECK-NEON-NEXT: mul x16, x1, x15
+; CHECK-NEON-NEXT: ldr x12, [sp, #824] // 8-byte Reload
+; CHECK-NEON-NEXT: str x8, [sp, #984] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x15
+; CHECK-NEON-NEXT: str x16, [sp, #896] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x2, #0x4000
+; CHECK-NEON-NEXT: mul x19, x1, x16
+; CHECK-NEON-NEXT: str x8, [sp, #1024] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x8, x0, x16
+; CHECK-NEON-NEXT: str x19, [sp, #888] // 8-byte Spill
+; CHECK-NEON-NEXT: and x19, x2, #0x8000
+; CHECK-NEON-NEXT: mul x20, x1, x19
+; CHECK-NEON-NEXT: str x8, [sp, #1016] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x8, [sp, #1032] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: ldr x9, [sp, #992] // 8-byte Reload
+; CHECK-NEON-NEXT: str x20, [sp, #904] // 8-byte Spill
+; CHECK-NEON-NEXT: and x20, x2, #0x10000
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: mul x21, x1, x20
+; CHECK-NEON-NEXT: ldr x10, [sp, #952] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #944] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x11, x0, x19
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: ldr x10, [sp, #936] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #968] // 8-byte Reload
+; CHECK-NEON-NEXT: str x21, [sp, #880] // 8-byte Spill
+; CHECK-NEON-NEXT: and x21, x2, #0x20000
+; CHECK-NEON-NEXT: mul x22, x1, x21
+; CHECK-NEON-NEXT: str x11, [sp, #1032] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x11, [sp, #920] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #848] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: ldr x10, [sp, #896] // 8-byte Reload
+; CHECK-NEON-NEXT: str x22, [sp, #840] // 8-byte Spill
+; CHECK-NEON-NEXT: and x22, x2, #0x40000
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x23, x1, x22
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #960] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #888] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x0, x20
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #904] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x11, x0, x21
+; CHECK-NEON-NEXT: str x23, [sp, #832] // 8-byte Spill
+; CHECK-NEON-NEXT: and x23, x2, #0x80000
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: mul x24, x1, x23
+; CHECK-NEON-NEXT: ldr x10, [sp, #880] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: str x12, [sp, #1008] // 8-byte Spill
+; CHECK-NEON-NEXT: str x11, [sp, #992] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x11, [sp, #840] // 8-byte Reload
+; CHECK-NEON-NEXT: str x24, [sp, #872] // 8-byte Spill
+; CHECK-NEON-NEXT: and x24, x2, #0x100000
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x25, x1, x24
+; CHECK-NEON-NEXT: ldr x11, [sp, #832] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x9, [sp, #872] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x25, [sp, #864] // 8-byte Spill
+; CHECK-NEON-NEXT: and x25, x2, #0x200000
+; CHECK-NEON-NEXT: mul x26, x1, x25
+; CHECK-NEON-NEXT: str x26, [sp, #912] // 8-byte Spill
+; CHECK-NEON-NEXT: and x26, x2, #0x400000
+; CHECK-NEON-NEXT: mul x27, x1, x26
+; CHECK-NEON-NEXT: str x27, [sp, #760] // 8-byte Spill
+; CHECK-NEON-NEXT: and x27, x2, #0x800000
+; CHECK-NEON-NEXT: mul x28, x1, x27
+; CHECK-NEON-NEXT: ldr x11, [sp, #760] // 8-byte Reload
+; CHECK-NEON-NEXT: str x28, [sp, #736] // 8-byte Spill
+; CHECK-NEON-NEXT: and x28, x2, #0x1000000
+; CHECK-NEON-NEXT: mul x29, x1, x28
+; CHECK-NEON-NEXT: ldr x12, [sp, #736] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x22
+; CHECK-NEON-NEXT: str x29, [sp, #784] // 8-byte Spill
+; CHECK-NEON-NEXT: and x29, x2, #0x2000000
+; CHECK-NEON-NEXT: mul x30, x1, x29
+; CHECK-NEON-NEXT: ldr x10, [sp, #784] // 8-byte Reload
+; CHECK-NEON-NEXT: str x12, [sp, #976] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #864] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: str x30, [sp, #776] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: ldr x11, [sp, #776] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x11, x0, x23
+; CHECK-NEON-NEXT: str x30, [sp, #800] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000
+; CHECK-NEON-NEXT: str x30, [sp, #1144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x11, [sp, #968] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x11, [sp, #912] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #800] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: str x30, [sp, #792] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000
+; CHECK-NEON-NEXT: str x30, [sp, #1152] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #816] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000
+; CHECK-NEON-NEXT: str x30, [sp, #1160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000
+; CHECK-NEON-NEXT: ldr x9, [sp, #816] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #728] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000
+; CHECK-NEON-NEXT: str x30, [sp, #1168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #728] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #696] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000
+; CHECK-NEON-NEXT: str x30, [sp, #1176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000
+; CHECK-NEON-NEXT: ldr x12, [sp, #696] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #792] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x30, [sp, #688] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: str x30, [sp, #1184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000
+; CHECK-NEON-NEXT: ldr x12, [sp, #688] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x24
+; CHECK-NEON-NEXT: str x30, [sp, #744] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000
+; CHECK-NEON-NEXT: str x30, [sp, #1192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000
+; CHECK-NEON-NEXT: ldr x10, [sp, #744] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #960] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: str x30, [sp, #720] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000
+; CHECK-NEON-NEXT: str x30, [sp, #1200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #720] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: str x30, [sp, #768] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000
+; CHECK-NEON-NEXT: str x30, [sp, #1208] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000
+; CHECK-NEON-NEXT: ldr x9, [sp, #768] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x30, [sp, #808] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000
+; CHECK-NEON-NEXT: str x30, [sp, #1216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #856] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #648] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #648] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #632] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1240] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000000
+; CHECK-NEON-NEXT: ldr x12, [sp, #632] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x25
+; CHECK-NEON-NEXT: str x30, [sp, #664] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1248] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000000
+; CHECK-NEON-NEXT: ldr x10, [sp, #664] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #952] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: str x30, [sp, #640] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1256] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #640] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: str x30, [sp, #680] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1264] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #672] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #712] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1280] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #704] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1288] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #752] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #520] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1304] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #520] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #504] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1312] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000000
+; CHECK-NEON-NEXT: ldr x12, [sp, #504] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x26
+; CHECK-NEON-NEXT: str x30, [sp, #560] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #944] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x12, [sp, #808] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #680] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #560] // 8-byte Reload
+; CHECK-NEON-NEXT: str x30, [sp, #552] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000000
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: ldr x12, [sp, #672] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #552] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x27
+; CHECK-NEON-NEXT: str x30, [sp, #584] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #936] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x12, [sp, #856] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #712] // 8-byte Reload
+; CHECK-NEON-NEXT: str x30, [sp, #576] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x8000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: str x30, [sp, #1344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000000000
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: ldr x12, [sp, #584] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x9, [sp, #752] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #704] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #576] // 8-byte Reload
+; CHECK-NEON-NEXT: str x30, [sp, #608] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x10000000000000
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x30, [sp, #1352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000000000
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: ldr x10, [sp, #608] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: mul x12, x0, x28
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: str x30, [sp, #592] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x20000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #592] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #928] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: str x30, [sp, #624] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x40000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1368] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000000000
+; CHECK-NEON-NEXT: ldr x9, [sp, #624] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x30, [sp, #616] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x80000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1376] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #528] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x100000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1384] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #528] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #512] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x200000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1392] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000000000
+; CHECK-NEON-NEXT: ldr x12, [sp, #512] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: mul x12, x0, x29
+; CHECK-NEON-NEXT: str x30, [sp, #544] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x400000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1400] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000000000
+; CHECK-NEON-NEXT: ldr x10, [sp, #544] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x12, [sp, #920] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #616] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: eor x21, x8, x9
+; CHECK-NEON-NEXT: str x30, [sp, #536] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x800000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1408] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #536] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1144] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: str x30, [sp, #568] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x1000000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1416] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000000000
+; CHECK-NEON-NEXT: ldr x8, [sp, #568] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: eor x8, x10, x8
+; CHECK-NEON-NEXT: str x11, [sp, #1144] // 8-byte Spill
+; CHECK-NEON-NEXT: str x30, [sp, #600] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x2000000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1424] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x30, x1, x30
+; CHECK-NEON-NEXT: str x30, [sp, #656] // 8-byte Spill
+; CHECK-NEON-NEXT: and x30, x2, #0x4000000000000000
+; CHECK-NEON-NEXT: and x2, x2, #0x8000000000000000
+; CHECK-NEON-NEXT: str x30, [sp, #1432] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x30, x1, x2
+; CHECK-NEON-NEXT: and x1, x3, #0x2
+; CHECK-NEON-NEXT: str x2, [sp, #1440] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x1
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #488] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x4
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x8
+; CHECK-NEON-NEXT: ldp x10, x9, [sp, #488] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: str x1, [sp, #456] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x10
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #456] // 8-byte Reload
+; CHECK-NEON-NEXT: stp x2, x1, [sp, #472] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x20
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x10, [sp, #472] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #480] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: str x1, [sp, #464] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x40
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x80
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x11, [sp, #1152] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x11, [sp, #600] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #464] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #424] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x100
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x1, [sp, #384] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x200
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #384] // 8-byte Reload
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #440] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x400
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x800
+; CHECK-NEON-NEXT: ldr x11, [sp, #448] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #424] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #440] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: mul x12, x0, x12
+; CHECK-NEON-NEXT: str x1, [sp, #392] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x1000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x12, [sp, #1160] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x12, [sp, #656] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #376] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x2000
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #376] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x8, x30
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #408] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x4000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #416] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #392] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #408] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1168] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #400] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x8000
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: mul x12, x0, x12
+; CHECK-NEON-NEXT: str x1, [sp, #432] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x10000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x20000
+; CHECK-NEON-NEXT: str x12, [sp, #1168] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x1, [sp, #328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x40000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #328] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x80000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x100000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #360] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #400] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #320] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x200000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1176] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x0, x12
+; CHECK-NEON-NEXT: str x1, [sp, #368] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x400000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x800000
+; CHECK-NEON-NEXT: str x12, [sp, #1176] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x12, [sp, #432] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #352] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #368] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: str x1, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x1000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x13, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x2000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: stp x2, x1, [sp, #280] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x4000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #344] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x8000000
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x10000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #1184] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: ldr x12, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x13, x0, x13
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x20000000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x40000000
+; CHECK-NEON-NEXT: ldr x10, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT: str x13, [sp, #1184] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #168] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x80000000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x100000000
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #168] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #1192] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x13, x0, x13
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #200] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x200000000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x400000000
+; CHECK-NEON-NEXT: ldr x11, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: ldr x12, [sp, #304] // 8-byte Reload
+; CHECK-NEON-NEXT: str x13, [sp, #1192] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x9, x10
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1200] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x800000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x10, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x0, x12
+; CHECK-NEON-NEXT: stp x2, x1, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x1000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x9, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT: str x12, [sp, #1200] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x9, x11, x9
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: str x1, [sp, #336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x2000000000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x4000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x1, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x8000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #152] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x10000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x10, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1208] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: str x1, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x20000000000
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x40000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x11, [sp, #1208] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x11, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #184] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x80000000000
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x2, x0, x1
+; CHECK-NEON-NEXT: and x1, x3, #0x100000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: stp x1, x2, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x200000000000
+; CHECK-NEON-NEXT: and x2, x3, #0x1000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: mul x2, x0, x2
+; CHECK-NEON-NEXT: str x1, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x400000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x1, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x800000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x2000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x2
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1216] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x4000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: mul x23, x0, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #336] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #192] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x8000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x9, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x10000000000000
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x10, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1224] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x24, x0, x12
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: str x1, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x20000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1232] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x25, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x40000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: str x1, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x80000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: str x1, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x100000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x9, [sp, #120] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x200000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x11, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x400000000000000
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: str x1, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x800000000000000
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: ldr x12, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1240] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x27, x0, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1944] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x1000000000000000
+; CHECK-NEON-NEXT: mul x26, x0, x12
+; CHECK-NEON-NEXT: ldr x10, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x12, [sp, #1904] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #1920] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x2000000000000000
+; CHECK-NEON-NEXT: ldr x8, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x8, x10, x8
+; CHECK-NEON-NEXT: ldr x10, [sp, #1936] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: ldr x10, [sp, #1928] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x4000000000000000
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1912] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1888] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1792] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #1256] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1880] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x28, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #128] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #1056] // 8-byte Spill
+; CHECK-NEON-NEXT: and x1, x3, #0x8000000000000000
+; CHECK-NEON-NEXT: eor x14, x8, x11
+; CHECK-NEON-NEXT: ldr x8, [sp, #1896] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #1832] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x1
+; CHECK-NEON-NEXT: eor x10, x8, x10
+; CHECK-NEON-NEXT: ldr x8, [sp, #1864] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1856] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x8, x11
+; CHECK-NEON-NEXT: ldr x8, [sp, #1264] // 8-byte Reload
+; CHECK-NEON-NEXT: str x1, [sp, #1080] // 8-byte Spill
+; CHECK-NEON-NEXT: mul x1, x0, x7
+; CHECK-NEON-NEXT: mul x29, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1872] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1840] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: str x1, [sp, #1136] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1816] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #1824] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1784] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1272] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x30, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1848] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1808] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1776] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x12, [sp, #1728] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1768] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #1280] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x22, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1800] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1760] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1720] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x12, [sp, #1656] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1696] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #1288] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x20, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1752] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1688] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1736] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x12, [sp, #1608] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1672] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1296] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x19, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1744] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1680] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1640] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #1664] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1584] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1304] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x7, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1704] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1632] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1600] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x12, [sp, #1520] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1568] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #1312] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x6, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1712] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1616] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x9, x10
+; CHECK-NEON-NEXT: ldr x9, [sp, #1448] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1536] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x6, x7, x6
+; CHECK-NEON-NEXT: ldr x7, [sp, #1424] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1592] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x7, x0, x7
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1512] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1320] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x5, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1624] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1560] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1504] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1480] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1328] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x4, x0, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1648] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1552] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1576] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1472] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1528] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x11, x9
+; CHECK-NEON-NEXT: ldr x11, [sp, #1456] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1488] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1336] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1544] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x3, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #992] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1464] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x13, x9
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1344] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1056] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1496] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1080] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: ldr x9, [sp, #1352] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x10
+; CHECK-NEON-NEXT: eor x14, x21, x16
+; CHECK-NEON-NEXT: rbit x8, x8
+; CHECK-NEON-NEXT: mul x18, x0, x9
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: ldr x9, [sp, #1360] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x10, [sp, #1112] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x17, x0, x9
+; CHECK-NEON-NEXT: eor x2, x13, x8, lsr #1
+; CHECK-NEON-NEXT: ldr x8, [sp, #1136] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x9, [sp, #1128] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: ldr x9, [sp, #1120] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x9, x10
+; CHECK-NEON-NEXT: ldr x9, [sp, #1096] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x10, [sp, #1072] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x8, x13
+; CHECK-NEON-NEXT: ldr x8, [sp, #1104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x9, x10
+; CHECK-NEON-NEXT: ldr x9, [sp, #1368] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x10, [sp, #984] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1064] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x21, x0, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1048] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1040] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #1000] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1376] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x16, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1088] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1024] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x13, x8
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1016] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1008] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x10, x11
+; CHECK-NEON-NEXT: ldr x10, [sp, #1384] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #936] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x15, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1032] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #976] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x10
+; CHECK-NEON-NEXT: eor x10, x8, x9
+; CHECK-NEON-NEXT: ldr x8, [sp, #968] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x13, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #1392] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x13, x0, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #960] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #944] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x8, x11
+; CHECK-NEON-NEXT: ldr x8, [sp, #1168] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #1176] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #928] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1184] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1400] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #952] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #920] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: ldr x10, [sp, #1152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1192] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x13, x12
+; CHECK-NEON-NEXT: ldr x13, [sp, #1440] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1144] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x13, x0, x13
+; CHECK-NEON-NEXT: eor x14, x14, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x14, x10
+; CHECK-NEON-NEXT: ldr x14, [sp, #1208] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1408] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x23
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: eor x10, x10, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1416] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x24
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x10, x25, x26
+; CHECK-NEON-NEXT: eor x10, x10, x27
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: mul x14, x0, x14
+; CHECK-NEON-NEXT: eor x9, x10, x28
+; CHECK-NEON-NEXT: eor x10, x6, x5
+; CHECK-NEON-NEXT: eor x10, x10, x4
+; CHECK-NEON-NEXT: ldr x4, [sp, #1432] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x29
+; CHECK-NEON-NEXT: eor x10, x10, x3
+; CHECK-NEON-NEXT: eor x9, x9, x30
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: mul x4, x0, x4
+; CHECK-NEON-NEXT: eor x10, x10, x1
+; CHECK-NEON-NEXT: eor x9, x9, x22
+; CHECK-NEON-NEXT: eor x10, x10, x18
+; CHECK-NEON-NEXT: eor x9, x9, x20
+; CHECK-NEON-NEXT: eor x11, x11, x14
+; CHECK-NEON-NEXT: eor x10, x10, x17
+; CHECK-NEON-NEXT: eor x9, x9, x19
+; CHECK-NEON-NEXT: eor x11, x11, x7
+; CHECK-NEON-NEXT: eor x10, x10, x21
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: mov x1, x2
+; CHECK-NEON-NEXT: eor x9, x10, x16
+; CHECK-NEON-NEXT: eor x10, x11, x4
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x10, x13
+; CHECK-NEON-NEXT: eor x0, x8, x9
+; CHECK-NEON-NEXT: add sp, sp, #1952
+; CHECK-NEON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_v1i128_neon:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: rbit x8, x2
+; CHECK-AES-NEXT: rbit x9, x0
+; CHECK-AES-NEXT: fmov d0, x2
+; CHECK-AES-NEXT: fmov d1, x1
+; CHECK-AES-NEXT: fmov d2, x3
+; CHECK-AES-NEXT: fmov d3, x8
+; CHECK-AES-NEXT: fmov d4, x9
+; CHECK-AES-NEXT: pmull v1.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: pmull v3.1q, v4.1d, v3.1d
+; CHECK-AES-NEXT: fmov d4, x0
+; CHECK-AES-NEXT: pmull v2.1q, v4.1d, v2.1d
+; CHECK-AES-NEXT: fmov x9, d1
+; CHECK-AES-NEXT: fmov x8, d3
+; CHECK-AES-NEXT: pmull v0.1q, v4.1d, v0.1d
+; CHECK-AES-NEXT: fmov x10, d2
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: fmov x0, d0
+; CHECK-AES-NEXT: eor x9, x10, x9
+; CHECK-AES-NEXT: eor x1, x9, x8, lsr #1
+; CHECK-AES-NEXT: ret
%a = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %x, <1 x i128> %y)
ret <1 x i128> %a
}
@@ -2721,562 +2744,580 @@ define <4 x i64> @clmul_v4i64_neon_zext(<4 x i32> %x, <4 x i32> %y) {
}
define <1 x i128> @clmul_v1i128_neon_zext(<1 x i64> %x, <1 x i64> %y) {
-; CHECK-LABEL: clmul_v1i128_neon_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: sub sp, sp, #624
-; CHECK-NEXT: .cfi_def_cfa_offset 720
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: fmov x10, d1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: and x8, x10, #0x2
-; CHECK-NEXT: mul x13, x11, x8
-; CHECK-NEXT: and x8, x10, #0x1
-; CHECK-NEXT: mul x14, x11, x8
-; CHECK-NEXT: and x8, x10, #0x4
-; CHECK-NEXT: mul x15, x11, x8
-; CHECK-NEXT: and x8, x10, #0x8
-; CHECK-NEXT: mul x16, x11, x8
-; CHECK-NEXT: and x8, x10, #0x10
-; CHECK-NEXT: mul x17, x11, x8
-; CHECK-NEXT: and x8, x10, #0x20
-; CHECK-NEXT: mul x18, x11, x8
-; CHECK-NEXT: and x8, x10, #0x40
-; CHECK-NEXT: mul x0, x11, x8
-; CHECK-NEXT: and x8, x10, #0x80
-; CHECK-NEXT: mul x1, x11, x8
-; CHECK-NEXT: and x8, x10, #0x100
-; CHECK-NEXT: mul x3, x11, x8
-; CHECK-NEXT: and x8, x10, #0x200
-; CHECK-NEXT: mul x2, x11, x8
-; CHECK-NEXT: and x8, x10, #0x400
-; CHECK-NEXT: mul x4, x11, x8
-; CHECK-NEXT: and x8, x10, #0x800
-; CHECK-NEXT: mul x5, x11, x8
-; CHECK-NEXT: and x8, x10, #0x1000
-; CHECK-NEXT: mul x20, x11, x8
-; CHECK-NEXT: and x8, x10, #0x2000
-; CHECK-NEXT: mul x6, x11, x8
-; CHECK-NEXT: and x8, x10, #0x4000
-; CHECK-NEXT: mul x7, x11, x8
-; CHECK-NEXT: and x8, x10, #0x8000
-; CHECK-NEXT: mul x19, x11, x8
-; CHECK-NEXT: and x8, x10, #0x10000
-; CHECK-NEXT: mul x21, x11, x8
-; CHECK-NEXT: and x8, x10, #0x20000
-; CHECK-NEXT: mul x22, x11, x8
-; CHECK-NEXT: and x8, x10, #0x40000
-; CHECK-NEXT: mul x23, x11, x8
-; CHECK-NEXT: and x8, x10, #0x80000
-; CHECK-NEXT: mul x24, x11, x8
-; CHECK-NEXT: and x8, x10, #0x100000
-; CHECK-NEXT: mul x25, x11, x8
-; CHECK-NEXT: and x8, x10, #0x200000
-; CHECK-NEXT: mul x26, x11, x8
-; CHECK-NEXT: and x8, x10, #0x400000
-; CHECK-NEXT: mul x27, x11, x8
-; CHECK-NEXT: and x8, x10, #0x800000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #592] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x1000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #584] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x2000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #616] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x4000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #576] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x8000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #608] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x10000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #600] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x20000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #568] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x40000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #512] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x80000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #536] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x100000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #528] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x200000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #560] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x400000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #520] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x800000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #552] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x1000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #544] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x2000000000
-; CHECK-NEXT: mul x9, x11, x8
-; CHECK-NEXT: and x8, x10, #0x4000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #440] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x8000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #464] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x10000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #456] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x20000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: stp x8, x9, [sp, #488] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x10, #0x40000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #448] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x80000000000
-; CHECK-NEXT: mul x9, x11, x8
-; CHECK-NEXT: and x8, x10, #0x100000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: stp x8, x9, [sp, #472] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x10, #0x200000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #504] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x400000000000
-; CHECK-NEXT: mul x9, x11, x8
-; CHECK-NEXT: and x8, x10, #0x800000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #392] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x1000000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #416] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x2000000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #408] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x4000000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: stp x8, x9, [sp, #424] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x10, #0x8000000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: str x8, [sp, #400] // 8-byte Spill
-; CHECK-NEXT: and x8, x10, #0x100000000000000
-; CHECK-NEXT: mul x9, x11, x8
-; CHECK-NEXT: and x8, x10, #0x200000000000000
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: stp x8, x9, [sp, #376] // 16-byte Folded Spill
-; CHECK-NEXT: and x9, x10, #0x400000000000000
-; CHECK-NEXT: rbit x8, x10
-; CHECK-NEXT: mul x9, x11, x9
-; CHECK-NEXT: and x12, x8, #0x2
-; CHECK-NEXT: str x9, [sp, #368] // 8-byte Spill
-; CHECK-NEXT: rbit x9, x11
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #360] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x1
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #352] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x4
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #344] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x8
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #336] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x10
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #328] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x20
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #320] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x40
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #312] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x80
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #304] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x100
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #296] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x200
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #288] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x400
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #280] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x800
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #272] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x1000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #256] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x2000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #248] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x4000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #264] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x8000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #240] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x10000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #232] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x20000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #200] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x40000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #224] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x80000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #192] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x100000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #216] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x200000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #208] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x400000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #184] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x800000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #136] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x1000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #168] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x2000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #160] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x4000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #176] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x8000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #152] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x10000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #144] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x20000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #128] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x40000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #120] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x80000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #112] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x100000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #104] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x200000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #96] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x400000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #88] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x800000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #80] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x1000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #72] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x2000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #64] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x4000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #56] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x8000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #48] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x10000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #40] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x20000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #32] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x40000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x80000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: str x12, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: eor x12, x14, x13
-; CHECK-NEXT: and x14, x8, #0x100000000000
-; CHECK-NEXT: mul x14, x9, x14
-; CHECK-NEXT: eor x13, x15, x16
-; CHECK-NEXT: and x15, x8, #0x200000000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: eor x13, x17, x18
-; CHECK-NEXT: ldr x16, [sp, #608] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x0
-; CHECK-NEXT: mul x30, x9, x15
-; CHECK-NEXT: and x15, x8, #0x400000000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: mul x29, x9, x15
-; CHECK-NEXT: and x15, x8, #0x800000000000
-; CHECK-NEXT: str x14, [sp, #8] // 8-byte Spill
-; CHECK-NEXT: eor x14, x1, x3
-; CHECK-NEXT: eor x13, x14, x2
-; CHECK-NEXT: eor x14, x5, x20
-; CHECK-NEXT: mul x28, x9, x15
-; CHECK-NEXT: eor x13, x13, x4
-; CHECK-NEXT: ldr x15, [sp, #592] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: eor x13, x14, x6
-; CHECK-NEXT: eor x14, x21, x22
-; CHECK-NEXT: eor x13, x13, x7
-; CHECK-NEXT: eor x14, x14, x23
-; CHECK-NEXT: eor x15, x27, x15
-; CHECK-NEXT: eor x13, x13, x19
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: eor x13, x14, x24
-; CHECK-NEXT: and x14, x8, #0x1000000000000
-; CHECK-NEXT: eor x13, x13, x25
-; CHECK-NEXT: mul x27, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #584] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x26
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #616] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: and x15, x8, #0x2000000000000
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #576] // 8-byte Reload
-; CHECK-NEXT: mul x25, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #512] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #568] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #536] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x4000000000000
-; CHECK-NEXT: mul x24, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #600] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #552] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #528] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #560] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x8000000000000
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #520] // 8-byte Reload
-; CHECK-NEXT: mul x23, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #440] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #496] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #464] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x10000000000000
-; CHECK-NEXT: mul x21, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #544] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #256] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #456] // 8-byte Reload
-; CHECK-NEXT: eor x26, x12, x13
-; CHECK-NEXT: ldr x12, [sp, #488] // 8-byte Reload
-; CHECK-NEXT: ldr x13, [sp, #448] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x20000000000000
-; CHECK-NEXT: eor x12, x14, x12
-; CHECK-NEXT: mul x20, x9, x15
-; CHECK-NEXT: ldr x14, [sp, #392] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #432] // 8-byte Reload
-; CHECK-NEXT: ldr x15, [sp, #480] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: and x14, x8, #0x40000000000000
-; CHECK-NEXT: eor x12, x12, x15
-; CHECK-NEXT: ldr x15, [sp, #416] // 8-byte Reload
-; CHECK-NEXT: mul x7, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #472] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #504] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: ldr x14, [sp, #408] // 8-byte Reload
-; CHECK-NEXT: eor x22, x12, x15
-; CHECK-NEXT: ldr x12, [sp, #424] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: and x14, x8, #0x80000000000000
-; CHECK-NEXT: eor x12, x13, x12
-; CHECK-NEXT: ldr x13, [sp, #400] // 8-byte Reload
-; CHECK-NEXT: mul x5, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #368] // 8-byte Reload
-; CHECK-NEXT: eor x19, x12, x13
-; CHECK-NEXT: ldp x13, x12, [sp, #376] // 16-byte Folded Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x100000000000000
-; CHECK-NEXT: eor x6, x12, x14
-; CHECK-NEXT: ldp x14, x12, [sp, #352] // 16-byte Folded Reload
-; CHECK-NEXT: mul x4, x9, x13
-; CHECK-NEXT: eor x12, x14, x12
-; CHECK-NEXT: ldp x14, x13, [sp, #336] // 16-byte Folded Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldp x15, x14, [sp, #320] // 16-byte Folded Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #312] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x200000000000000
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: mul x3, x9, x15
-; CHECK-NEXT: and x15, x8, #0x400000000000000
-; CHECK-NEXT: eor x14, x12, x13
-; CHECK-NEXT: ldp x13, x12, [sp, #296] // 16-byte Folded Reload
-; CHECK-NEXT: mul x2, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #280] // 8-byte Reload
-; CHECK-NEXT: eor x13, x12, x13
-; CHECK-NEXT: ldr x12, [sp, #288] // 8-byte Reload
-; CHECK-NEXT: eor x3, x4, x3
-; CHECK-NEXT: and x4, x10, #0x2000000000000000
-; CHECK-NEXT: eor x13, x13, x12
-; CHECK-NEXT: ldr x12, [sp, #272] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #248] // 8-byte Reload
-; CHECK-NEXT: eor x2, x3, x2
-; CHECK-NEXT: eor x12, x12, x16
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #264] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x15
-; CHECK-NEXT: and x15, x8, #0x800000000000000
-; CHECK-NEXT: mul x3, x11, x4
-; CHECK-NEXT: eor x14, x12, x14
-; CHECK-NEXT: mul x1, x9, x15
-; CHECK-NEXT: ldp x12, x15, [sp, #232] // 16-byte Folded Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: ldr x15, [sp, #200] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldp x16, x14, [sp, #216] // 16-byte Folded Reload
-; CHECK-NEXT: eor x15, x12, x15
-; CHECK-NEXT: and x12, x8, #0x1000000000000000
-; CHECK-NEXT: eor x1, x2, x1
-; CHECK-NEXT: mul x0, x9, x12
-; CHECK-NEXT: and x2, x10, #0x4000000000000000
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: ldp x12, x15, [sp, #184] // 16-byte Folded Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: ldr x15, [sp, #136] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #168] // 8-byte Reload
-; CHECK-NEXT: eor x0, x1, x0
-; CHECK-NEXT: eor x15, x12, x15
-; CHECK-NEXT: and x12, x8, #0x2000000000000000
-; CHECK-NEXT: and x8, x8, #0x4000000000000000
-; CHECK-NEXT: mul x18, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #208] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: eor x14, x14, x12
-; CHECK-NEXT: ldr x12, [sp, #160] // 8-byte Reload
-; CHECK-NEXT: mul x17, x9, x8
-; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: eor x15, x15, x12
-; CHECK-NEXT: ldr x12, [sp, #176] // 8-byte Reload
-; CHECK-NEXT: mul x1, x11, x2
-; CHECK-NEXT: eor x18, x0, x18
-; CHECK-NEXT: eor x14, x15, x12
-; CHECK-NEXT: and x15, x10, #0x10000000000000
-; CHECK-NEXT: eor x9, x14, x8
-; CHECK-NEXT: ldp x12, x8, [sp, #120] // 16-byte Folded Reload
-; CHECK-NEXT: mul x16, x11, x15
-; CHECK-NEXT: eor x14, x8, x12
-; CHECK-NEXT: ldr x8, [sp, #144] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x8
-; CHECK-NEXT: ldr x8, [sp, #112] // 8-byte Reload
-; CHECK-NEXT: eor x9, x13, x9
-; CHECK-NEXT: eor x14, x14, x8
-; CHECK-NEXT: ldr x8, [sp, #104] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x8
-; CHECK-NEXT: ldr x8, [sp, #96] // 8-byte Reload
-; CHECK-NEXT: and x14, x10, #0x20000000000000
-; CHECK-NEXT: mul x15, x11, x14
-; CHECK-NEXT: eor x13, x13, x8
-; CHECK-NEXT: ldp x12, x8, [sp, #56] // 16-byte Folded Reload
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: ldp x14, x12, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: eor x13, x13, x12
-; CHECK-NEXT: ldr x12, [sp, #48] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: and x12, x10, #0x40000000000000
-; CHECK-NEXT: eor x8, x8, x14
-; CHECK-NEXT: mul x14, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #72] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x12
-; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: and x12, x10, #0x80000000000000
-; CHECK-NEXT: eor x8, x8, x13
-; CHECK-NEXT: ldr x13, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: mul x12, x11, x12
-; CHECK-NEXT: eor x8, x8, x13
-; CHECK-NEXT: eor x13, x29, x28
-; CHECK-NEXT: ldr x29, [sp, #8] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x27
-; CHECK-NEXT: and x28, x10, #0x800000000000000
-; CHECK-NEXT: eor x8, x8, x29
-; CHECK-NEXT: eor x13, x13, x25
-; CHECK-NEXT: mul x27, x11, x28
-; CHECK-NEXT: eor x8, x8, x30
-; CHECK-NEXT: and x25, x10, #0x1000000000000000
-; CHECK-NEXT: and x10, x10, #0x8000000000000000
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: eor x9, x13, x24
-; CHECK-NEXT: mul x13, x11, x25
-; CHECK-NEXT: eor x9, x9, x23
-; CHECK-NEXT: eor x9, x9, x21
-; CHECK-NEXT: mul x10, x11, x10
-; CHECK-NEXT: eor x11, x19, x16
-; CHECK-NEXT: eor x9, x9, x20
-; CHECK-NEXT: eor x16, x6, x27
-; CHECK-NEXT: eor x9, x9, x7
-; CHECK-NEXT: eor x9, x9, x5
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x18, x17
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x11, x15
-; CHECK-NEXT: eor x11, x16, x13
-; CHECK-NEXT: rbit x8, x8
-; CHECK-NEXT: eor x9, x9, x14
-; CHECK-NEXT: eor x11, x11, x3
-; CHECK-NEXT: eor x13, x26, x22
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: eor x11, x11, x1
-; CHECK-NEXT: lsr x1, x8, #1
-; CHECK-NEXT: eor x8, x13, x9
-; CHECK-NEXT: eor x9, x11, x10
-; CHECK-NEXT: eor x0, x8, x9
-; CHECK-NEXT: add sp, sp, #624
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_v1i128_neon_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: sub sp, sp, #624
+; CHECK-NEON-NEXT: .cfi_def_cfa_offset 720
+; CHECK-NEON-NEXT: .cfi_offset w19, -8
+; CHECK-NEON-NEXT: .cfi_offset w20, -16
+; CHECK-NEON-NEXT: .cfi_offset w21, -24
+; CHECK-NEON-NEXT: .cfi_offset w22, -32
+; CHECK-NEON-NEXT: .cfi_offset w23, -40
+; CHECK-NEON-NEXT: .cfi_offset w24, -48
+; CHECK-NEON-NEXT: .cfi_offset w25, -56
+; CHECK-NEON-NEXT: .cfi_offset w26, -64
+; CHECK-NEON-NEXT: .cfi_offset w27, -72
+; CHECK-NEON-NEXT: .cfi_offset w28, -80
+; CHECK-NEON-NEXT: .cfi_offset w30, -88
+; CHECK-NEON-NEXT: .cfi_offset w29, -96
+; CHECK-NEON-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEON-NEXT: fmov x11, d0
+; CHECK-NEON-NEXT: and x8, x10, #0x2
+; CHECK-NEON-NEXT: mul x13, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x1
+; CHECK-NEON-NEXT: mul x14, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x4
+; CHECK-NEON-NEXT: mul x15, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x8
+; CHECK-NEON-NEXT: mul x16, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x10
+; CHECK-NEON-NEXT: mul x17, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x20
+; CHECK-NEON-NEXT: mul x18, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x40
+; CHECK-NEON-NEXT: mul x0, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x80
+; CHECK-NEON-NEXT: mul x1, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x100
+; CHECK-NEON-NEXT: mul x3, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x200
+; CHECK-NEON-NEXT: mul x2, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x400
+; CHECK-NEON-NEXT: mul x4, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x800
+; CHECK-NEON-NEXT: mul x5, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x1000
+; CHECK-NEON-NEXT: mul x20, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x2000
+; CHECK-NEON-NEXT: mul x6, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x4000
+; CHECK-NEON-NEXT: mul x7, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x8000
+; CHECK-NEON-NEXT: mul x19, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x10000
+; CHECK-NEON-NEXT: mul x21, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x20000
+; CHECK-NEON-NEXT: mul x22, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x40000
+; CHECK-NEON-NEXT: mul x23, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x80000
+; CHECK-NEON-NEXT: mul x24, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x100000
+; CHECK-NEON-NEXT: mul x25, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x200000
+; CHECK-NEON-NEXT: mul x26, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x400000
+; CHECK-NEON-NEXT: mul x27, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x800000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #592] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x1000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #584] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x2000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #616] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x4000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #576] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x8000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #608] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x10000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #600] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x20000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #568] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x40000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #512] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x80000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #536] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x100000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #528] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x200000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #560] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x400000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #520] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x800000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #552] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x1000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #544] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x2000000000
+; CHECK-NEON-NEXT: mul x9, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x4000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #440] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x8000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #464] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x10000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #456] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x20000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #488] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x40000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #448] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x80000000000
+; CHECK-NEON-NEXT: mul x9, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x100000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #472] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x200000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #504] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x400000000000
+; CHECK-NEON-NEXT: mul x9, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x800000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #392] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x1000000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #416] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x2000000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #408] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x4000000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #424] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x8000000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: str x8, [sp, #400] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x10, #0x100000000000000
+; CHECK-NEON-NEXT: mul x9, x11, x8
+; CHECK-NEON-NEXT: and x8, x10, #0x200000000000000
+; CHECK-NEON-NEXT: mul x8, x11, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #376] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x9, x10, #0x400000000000000
+; CHECK-NEON-NEXT: rbit x8, x10
+; CHECK-NEON-NEXT: mul x9, x11, x9
+; CHECK-NEON-NEXT: and x12, x8, #0x2
+; CHECK-NEON-NEXT: str x9, [sp, #368] // 8-byte Spill
+; CHECK-NEON-NEXT: rbit x9, x11
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x1
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x4
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x8
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x10
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x20
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x40
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x80
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #304] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x100
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x200
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x400
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #280] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x800
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x1000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x2000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x4000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x8000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x10000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x20000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x40000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x80000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x100000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x200000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x400000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x800000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x1000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x2000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x4000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x8000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x10000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x20000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x40000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x80000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x100000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x200000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x400000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x800000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x1000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x2000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x4000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x8000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x10000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x20000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x40000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x80000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: str x12, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x12, x14, x13
+; CHECK-NEON-NEXT: and x14, x8, #0x100000000000
+; CHECK-NEON-NEXT: mul x14, x9, x14
+; CHECK-NEON-NEXT: eor x13, x15, x16
+; CHECK-NEON-NEXT: and x15, x8, #0x200000000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: eor x13, x17, x18
+; CHECK-NEON-NEXT: ldr x16, [sp, #608] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x0
+; CHECK-NEON-NEXT: mul x30, x9, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x400000000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: mul x29, x9, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x800000000000
+; CHECK-NEON-NEXT: str x14, [sp, #8] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x14, x1, x3
+; CHECK-NEON-NEXT: eor x13, x14, x2
+; CHECK-NEON-NEXT: eor x14, x5, x20
+; CHECK-NEON-NEXT: mul x28, x9, x15
+; CHECK-NEON-NEXT: eor x13, x13, x4
+; CHECK-NEON-NEXT: ldr x15, [sp, #592] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: eor x13, x14, x6
+; CHECK-NEON-NEXT: eor x14, x21, x22
+; CHECK-NEON-NEXT: eor x13, x13, x7
+; CHECK-NEON-NEXT: eor x14, x14, x23
+; CHECK-NEON-NEXT: eor x15, x27, x15
+; CHECK-NEON-NEXT: eor x13, x13, x19
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: eor x13, x14, x24
+; CHECK-NEON-NEXT: and x14, x8, #0x1000000000000
+; CHECK-NEON-NEXT: eor x13, x13, x25
+; CHECK-NEON-NEXT: mul x27, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #584] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x26
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #616] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: and x15, x8, #0x2000000000000
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #576] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x25, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #512] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #568] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #536] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x4000000000000
+; CHECK-NEON-NEXT: mul x24, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #600] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #552] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #528] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #560] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x8000000000000
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #520] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x23, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #440] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #496] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #464] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x10000000000000
+; CHECK-NEON-NEXT: mul x21, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #544] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #456] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x26, x12, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #488] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x13, [sp, #448] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x20000000000000
+; CHECK-NEON-NEXT: eor x12, x14, x12
+; CHECK-NEON-NEXT: mul x20, x9, x15
+; CHECK-NEON-NEXT: ldr x14, [sp, #392] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #432] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x15, [sp, #480] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x40000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #416] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x7, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #472] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #504] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #408] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x22, x12, x15
+; CHECK-NEON-NEXT: ldr x12, [sp, #424] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x80000000000000
+; CHECK-NEON-NEXT: eor x12, x13, x12
+; CHECK-NEON-NEXT: ldr x13, [sp, #400] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x5, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #368] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x19, x12, x13
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #376] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x100000000000000
+; CHECK-NEON-NEXT: eor x6, x12, x14
+; CHECK-NEON-NEXT: ldp x14, x12, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x4, x9, x13
+; CHECK-NEON-NEXT: eor x12, x14, x12
+; CHECK-NEON-NEXT: ldp x14, x13, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldp x15, x14, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x200000000000000
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: mul x3, x9, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x400000000000000
+; CHECK-NEON-NEXT: eor x14, x12, x13
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #296] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x2, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x12, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x3, x4, x3
+; CHECK-NEON-NEXT: and x4, x10, #0x2000000000000000
+; CHECK-NEON-NEXT: eor x13, x13, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x2, x3, x2
+; CHECK-NEON-NEXT: eor x12, x12, x16
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x800000000000000
+; CHECK-NEON-NEXT: mul x3, x11, x4
+; CHECK-NEON-NEXT: eor x14, x12, x14
+; CHECK-NEON-NEXT: mul x1, x9, x15
+; CHECK-NEON-NEXT: ldp x12, x15, [sp, #232] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldp x16, x14, [sp, #216] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x15, x12, x15
+; CHECK-NEON-NEXT: and x12, x8, #0x1000000000000000
+; CHECK-NEON-NEXT: eor x1, x2, x1
+; CHECK-NEON-NEXT: mul x0, x9, x12
+; CHECK-NEON-NEXT: and x2, x10, #0x4000000000000000
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: ldp x12, x15, [sp, #184] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #168] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x1, x0
+; CHECK-NEON-NEXT: eor x15, x12, x15
+; CHECK-NEON-NEXT: and x12, x8, #0x2000000000000000
+; CHECK-NEON-NEXT: and x8, x8, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x18, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: eor x14, x14, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x17, x9, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: eor x15, x15, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #176] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x11, x2
+; CHECK-NEON-NEXT: eor x18, x0, x18
+; CHECK-NEON-NEXT: eor x14, x15, x12
+; CHECK-NEON-NEXT: and x15, x10, #0x10000000000000
+; CHECK-NEON-NEXT: eor x9, x14, x8
+; CHECK-NEON-NEXT: ldp x12, x8, [sp, #120] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x16, x11, x15
+; CHECK-NEON-NEXT: eor x14, x8, x12
+; CHECK-NEON-NEXT: ldr x8, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x13, x9
+; CHECK-NEON-NEXT: eor x14, x14, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT: and x14, x10, #0x20000000000000
+; CHECK-NEON-NEXT: mul x15, x11, x14
+; CHECK-NEON-NEXT: eor x13, x13, x8
+; CHECK-NEON-NEXT: ldp x12, x8, [sp, #56] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: ldp x14, x12, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x13, x13, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: and x12, x10, #0x40000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x14
+; CHECK-NEON-NEXT: mul x14, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: and x12, x10, #0x80000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x11, x12
+; CHECK-NEON-NEXT: eor x8, x8, x13
+; CHECK-NEON-NEXT: eor x13, x29, x28
+; CHECK-NEON-NEXT: ldr x29, [sp, #8] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x27
+; CHECK-NEON-NEXT: and x28, x10, #0x800000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x29
+; CHECK-NEON-NEXT: eor x13, x13, x25
+; CHECK-NEON-NEXT: mul x27, x11, x28
+; CHECK-NEON-NEXT: eor x8, x8, x30
+; CHECK-NEON-NEXT: and x25, x10, #0x1000000000000000
+; CHECK-NEON-NEXT: and x10, x10, #0x8000000000000000
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: eor x9, x13, x24
+; CHECK-NEON-NEXT: mul x13, x11, x25
+; CHECK-NEON-NEXT: eor x9, x9, x23
+; CHECK-NEON-NEXT: eor x9, x9, x21
+; CHECK-NEON-NEXT: mul x10, x11, x10
+; CHECK-NEON-NEXT: eor x11, x19, x16
+; CHECK-NEON-NEXT: eor x9, x9, x20
+; CHECK-NEON-NEXT: eor x16, x6, x27
+; CHECK-NEON-NEXT: eor x9, x9, x7
+; CHECK-NEON-NEXT: eor x9, x9, x5
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x18, x17
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x11, x15
+; CHECK-NEON-NEXT: eor x11, x16, x13
+; CHECK-NEON-NEXT: rbit x8, x8
+; CHECK-NEON-NEXT: eor x9, x9, x14
+; CHECK-NEON-NEXT: eor x11, x11, x3
+; CHECK-NEON-NEXT: eor x13, x26, x22
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: eor x11, x11, x1
+; CHECK-NEON-NEXT: lsr x1, x8, #1
+; CHECK-NEON-NEXT: eor x8, x13, x9
+; CHECK-NEON-NEXT: eor x9, x11, x10
+; CHECK-NEON-NEXT: eor x0, x8, x9
+; CHECK-NEON-NEXT: add sp, sp, #624
+; CHECK-NEON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_v1i128_neon_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-AES-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-AES-NEXT: fmov x8, d0
+; CHECK-AES-NEXT: fmov x9, d1
+; CHECK-AES-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT: rbit x9, x9
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: fmov d2, x9
+; CHECK-AES-NEXT: fmov d3, x8
+; CHECK-AES-NEXT: fmov x0, d0
+; CHECK-AES-NEXT: pmull v2.1q, v3.1d, v2.1d
+; CHECK-AES-NEXT: fmov x8, d2
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: lsr x1, x8, #1
+; CHECK-AES-NEXT: ret
%zextx = zext <1 x i64> %x to <1 x i128>
%zexty = zext <1 x i64> %y to <1 x i128>
%a = call <1 x i128> @llvm.clmul.v2i128(<1 x i128> %zextx, <1 x i128> %zexty)
@@ -3284,1205 +3325,1235 @@ define <1 x i128> @clmul_v1i128_neon_zext(<1 x i64> %x, <1 x i64> %y) {
}
define <2 x i128> @clmul_v2i128_neon_zext(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: clmul_v2i128_neon_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: sub sp, sp, #1376
-; CHECK-NEXT: .cfi_def_cfa_offset 1472
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: and x10, x9, #0x2
-; CHECK-NEXT: mul x0, x8, x10
-; CHECK-NEXT: and x10, x9, #0x1
-; CHECK-NEXT: mul x5, x8, x10
-; CHECK-NEXT: and x10, x9, #0x4
-; CHECK-NEXT: mul x7, x8, x10
-; CHECK-NEXT: and x10, x9, #0x8
-; CHECK-NEXT: mul x24, x8, x10
-; CHECK-NEXT: and x10, x9, #0x10
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: eor x5, x7, x24
-; CHECK-NEXT: str x10, [sp, #1368] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1360] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1352] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: ldr x7, [sp, #1352] // 8-byte Reload
-; CHECK-NEXT: str x10, [sp, #1344] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1328] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: ldr x24, [sp, #1328] // 8-byte Reload
-; CHECK-NEXT: str x10, [sp, #1320] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1336] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1312] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1304] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1296] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1288] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1280] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1272] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1248] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1240] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1264] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1232] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1256] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1216] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1176] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1208] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1200] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1192] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1184] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1224] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1168] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1120] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1112] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1160] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1152] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1136] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1128] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1144] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1104] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1048] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1040] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1080] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1072] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1064] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1056] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1096] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1088] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1008] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #968] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #960] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #992] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #984] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1000] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #976] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1032] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1024] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #1016] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #944] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #904] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #936] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #928] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #920] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #912] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #952] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000000000000
-; CHECK-NEXT: rbit x9, x9
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: rbit x8, x8
-; CHECK-NEXT: str x10, [sp, #448] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #896] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #888] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #880] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #872] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #864] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #856] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #848] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #840] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #832] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #824] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #816] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #808] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #800] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #792] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #784] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #776] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #768] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #744] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #736] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #760] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #728] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #752] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #720] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #672] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #704] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #696] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #688] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #680] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #712] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #664] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #616] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #608] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #632] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #624] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #656] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #648] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #640] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #600] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x4000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #576] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x8000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #568] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x10000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #560] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #552] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x40000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #544] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #536] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #592] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x200000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #584] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #520] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000000
-; CHECK-NEXT: mul x11, x8, x10
-; CHECK-NEXT: and x10, x9, #0x1000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: stp x10, x11, [sp, #456] // 16-byte Folded Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000000
-; CHECK-NEXT: mul x11, x8, x10
-; CHECK-NEXT: and x10, x9, #0x4000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: stp x10, x11, [sp, #480] // 16-byte Folded Spill
-; CHECK-NEXT: and x10, x9, #0x8000000000000
-; CHECK-NEXT: mul x11, x8, x10
-; CHECK-NEXT: and x10, x9, #0x10000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #472] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x20000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: stp x10, x11, [sp, #504] // 16-byte Folded Spill
-; CHECK-NEXT: and x10, x9, #0x40000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #496] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x80000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #528] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x100000000000000
-; CHECK-NEXT: mul x11, x8, x10
-; CHECK-NEXT: and x10, x9, #0x200000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #400] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x400000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #424] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x800000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: str x10, [sp, #416] // 8-byte Spill
-; CHECK-NEXT: and x10, x9, #0x1000000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: stp x10, x11, [sp, #432] // 16-byte Folded Spill
-; CHECK-NEXT: and x10, x9, #0x2000000000000000
-; CHECK-NEXT: and x9, x9, #0x4000000000000000
-; CHECK-NEXT: mul x10, x8, x10
-; CHECK-NEXT: mov x11, v1.d[1]
-; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: str x10, [sp, #408] // 8-byte Spill
-; CHECK-NEXT: mov x10, v0.d[1]
-; CHECK-NEXT: str x8, [sp, #392] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x2
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #296] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x1
-; CHECK-NEXT: mul x9, x10, x8
-; CHECK-NEXT: and x8, x11, #0x4
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #224] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x8
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #168] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x10
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #272] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x20
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #216] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x40
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: stp x8, x9, [sp, #248] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x11, #0x80
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #136] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x100
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #88] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x200
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Reload
-; CHECK-NEXT: str x8, [sp, #104] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x400
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #160] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x800
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #264] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x1000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #208] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x2000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #240] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x4000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #288] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x8000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #304] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x10000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #48] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x20000
-; CHECK-NEXT: mul x26, x10, x8
-; CHECK-NEXT: and x8, x11, #0x40000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x80000
-; CHECK-NEXT: mul x9, x10, x8
-; CHECK-NEXT: and x8, x11, #0x100000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: stp x9, x8, [sp, #72] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x11, #0x200000
-; CHECK-NEXT: mul x9, x10, x8
-; CHECK-NEXT: and x8, x11, #0x400000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #184] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x800000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #128] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x1000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #120] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x2000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: stp x8, x9, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x11, #0x4000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #200] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x8000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #232] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x10000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #280] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x20000000
-; CHECK-NEXT: mul x20, x10, x8
-; CHECK-NEXT: and x8, x11, #0x40000000
-; CHECK-NEXT: mul x15, x10, x8
-; CHECK-NEXT: and x8, x11, #0x80000000
-; CHECK-NEXT: mul x19, x10, x8
-; CHECK-NEXT: and x8, x11, #0x100000000
-; CHECK-NEXT: mul x22, x10, x8
-; CHECK-NEXT: and x8, x11, #0x200000000
-; CHECK-NEXT: eor x15, x20, x15
-; CHECK-NEXT: mul x25, x10, x8
-; CHECK-NEXT: and x8, x11, #0x400000000
-; CHECK-NEXT: eor x15, x15, x19
-; CHECK-NEXT: mul x29, x10, x8
-; CHECK-NEXT: and x8, x11, #0x800000000
-; CHECK-NEXT: eor x15, x15, x22
-; CHECK-NEXT: mul x9, x10, x8
-; CHECK-NEXT: and x8, x11, #0x1000000000
-; CHECK-NEXT: eor x15, x15, x25
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: eor x15, x15, x29
-; CHECK-NEXT: str x8, [sp, #64] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x2000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: stp x8, x9, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x11, #0x4000000000
-; CHECK-NEXT: and x9, x11, #0x400000000000000
-; CHECK-NEXT: mul x27, x10, x8
-; CHECK-NEXT: and x8, x11, #0x8000000000
-; CHECK-NEXT: mul x30, x10, x8
-; CHECK-NEXT: and x8, x11, #0x10000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: mul x13, x10, x9
-; CHECK-NEXT: rbit x9, x10
-; CHECK-NEXT: str x8, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x20000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #56] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x40000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #96] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x80000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #112] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x100000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #176] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x200000000000
-; CHECK-NEXT: mul x8, x10, x8
-; CHECK-NEXT: str x8, [sp, #192] // 8-byte Spill
-; CHECK-NEXT: and x8, x11, #0x400000000000
-; CHECK-NEXT: mul x17, x10, x8
-; CHECK-NEXT: and x8, x11, #0x800000000000
-; CHECK-NEXT: mul x12, x10, x8
-; CHECK-NEXT: and x8, x11, #0x1000000000000
-; CHECK-NEXT: mul x14, x10, x8
-; CHECK-NEXT: and x8, x11, #0x2000000000000
-; CHECK-NEXT: mul x18, x10, x8
-; CHECK-NEXT: and x8, x11, #0x4000000000000
-; CHECK-NEXT: eor x12, x17, x12
-; CHECK-NEXT: mul x1, x10, x8
-; CHECK-NEXT: and x8, x11, #0x8000000000000
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: mul x3, x10, x8
-; CHECK-NEXT: and x8, x11, #0x10000000000000
-; CHECK-NEXT: eor x12, x12, x18
-; CHECK-NEXT: mul x6, x10, x8
-; CHECK-NEXT: and x8, x11, #0x20000000000000
-; CHECK-NEXT: eor x12, x12, x1
-; CHECK-NEXT: mul x21, x10, x8
-; CHECK-NEXT: and x8, x11, #0x40000000000000
-; CHECK-NEXT: eor x12, x12, x3
-; CHECK-NEXT: mul x23, x10, x8
-; CHECK-NEXT: and x8, x11, #0x100000000000000
-; CHECK-NEXT: eor x12, x12, x6
-; CHECK-NEXT: mul x4, x10, x8
-; CHECK-NEXT: and x8, x11, #0x200000000000000
-; CHECK-NEXT: eor x12, x12, x21
-; CHECK-NEXT: mul x2, x10, x8
-; CHECK-NEXT: rbit x8, x11
-; CHECK-NEXT: eor x12, x12, x23
-; CHECK-NEXT: and x16, x8, #0x2
-; CHECK-NEXT: and x14, x8, #0x800000000
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: mul x14, x9, x14
-; CHECK-NEXT: str x16, [sp, #384] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x1
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #376] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x4
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #368] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x8
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #360] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x10
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #352] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x20
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #336] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x40
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #344] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x80
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #328] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x100
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #320] // 8-byte Spill
-; CHECK-NEXT: and x16, x8, #0x200
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: str x16, [sp, #312] // 8-byte Spill
-; CHECK-NEXT: eor x16, x0, x5
-; CHECK-NEXT: ldr x0, [sp, #1368] // 8-byte Reload
-; CHECK-NEXT: ldr x5, [sp, #1360] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: and x5, x8, #0x400
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #1344] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1320] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1304] // 8-byte Reload
-; CHECK-NEXT: eor x0, x7, x0
-; CHECK-NEXT: ldr x7, [sp, #1336] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1360] // 8-byte Spill
-; CHECK-NEXT: and x5, x8, #0x800
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: ldr x7, [sp, #1312] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1296] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1248] // 8-byte Reload
-; CHECK-NEXT: eor x0, x7, x0
-; CHECK-NEXT: ldr x7, [sp, #1288] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1352] // 8-byte Spill
-; CHECK-NEXT: and x5, x8, #0x1000
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: ldr x7, [sp, #1272] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1256] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1336] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #1280] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #1240] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1264] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: and x7, x8, #0x2000
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: mul x5, x9, x7
-; CHECK-NEXT: ldr x7, [sp, #1176] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1328] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #1232] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #1216] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #1208] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: and x7, x8, #0x4000
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1200] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: ldr x24, [sp, #1120] // 8-byte Reload
-; CHECK-NEXT: str x14, [sp, #1120] // 8-byte Spill
-; CHECK-NEXT: ldr x14, [sp, #112] // 8-byte Reload
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: and x5, x8, #0x8000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: str x7, [sp, #1320] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #1192] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #1168] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1312] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #1184] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1224] // 8-byte Reload
-; CHECK-NEXT: str x12, [sp, #1184] // 8-byte Spill
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #1112] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #1160] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: and x7, x8, #0x10000
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: ldr x0, [sp, #1152] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: ldr x24, [sp, #1048] // 8-byte Reload
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: and x5, x8, #0x20000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: str x7, [sp, #1304] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #1136] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #1104] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1296] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #1128] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1144] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #1040] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #1080] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1072] // 8-byte Reload
-; CHECK-NEXT: and x7, x8, #0x40000
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: ldr x24, [sp, #968] // 8-byte Reload
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: and x5, x8, #0x80000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: str x7, [sp, #1288] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #1064] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1280] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #1056] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #1008] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #960] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #1096] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: and x7, x8, #0x100000
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #992] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: ldr x24, [sp, #1032] // 8-byte Reload
-; CHECK-NEXT: str x7, [sp, #1272] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #1088] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #984] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #1000] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: and x7, x8, #0x200000
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: mul x5, x9, x7
-; CHECK-NEXT: ldr x7, [sp, #904] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1264] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #976] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #944] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #936] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: and x7, x8, #0x400000
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: ldr x24, [sp, #1016] // 8-byte Reload
-; CHECK-NEXT: str x7, [sp, #1256] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #1024] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #928] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #920] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: and x7, x8, #0x800000
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: and x0, x8, #0x1000000
-; CHECK-NEXT: str x16, [sp, #1368] // 8-byte Spill
-; CHECK-NEXT: ldr x16, [sp, #912] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: ldr x24, [sp, #168] // 8-byte Reload
-; CHECK-NEXT: mul x0, x9, x0
-; CHECK-NEXT: eor x16, x5, x16
-; CHECK-NEXT: ldr x5, [sp, #296] // 8-byte Reload
-; CHECK-NEXT: str x7, [sp, #1240] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #256] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: ldr x7, [sp, #224] // 8-byte Reload
-; CHECK-NEXT: str x0, [sp, #1232] // 8-byte Spill
-; CHECK-NEXT: ldr x0, [sp, #952] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #216] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: eor x0, x5, x7
-; CHECK-NEXT: and x5, x8, #0x2000000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: ldr x7, [sp, #272] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x24
-; CHECK-NEXT: ldr x24, [sp, #136] // 8-byte Reload
-; CHECK-NEXT: eor x24, x24, x28
-; CHECK-NEXT: str x5, [sp, #1224] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #248] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: ldr x7, [sp, #104] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: ldr x5, [sp, #160] // 8-byte Reload
-; CHECK-NEXT: eor x7, x24, x7
-; CHECK-NEXT: and x24, x8, #0x4000000
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: mul x7, x9, x24
-; CHECK-NEXT: ldr x24, [sp, #288] // 8-byte Reload
-; CHECK-NEXT: str x7, [sp, #1216] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #448] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x7
-; CHECK-NEXT: ldr x7, [sp, #208] // 8-byte Reload
-; CHECK-NEXT: str x16, [sp, #1344] // 8-byte Spill
-; CHECK-NEXT: eor x16, x0, x5
-; CHECK-NEXT: and x0, x8, #0x8000000
-; CHECK-NEXT: mul x0, x9, x0
-; CHECK-NEXT: ldr x5, [sp, #264] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: ldr x7, [sp, #48] // 8-byte Reload
-; CHECK-NEXT: eor x7, x7, x26
-; CHECK-NEXT: str x0, [sp, #1208] // 8-byte Spill
-; CHECK-NEXT: ldr x0, [sp, #240] // 8-byte Reload
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: ldr x5, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x24
-; CHECK-NEXT: ldr x24, [sp, #72] // 8-byte Reload
-; CHECK-NEXT: eor x5, x7, x5
-; CHECK-NEXT: and x7, x8, #0x10000000
-; CHECK-NEXT: mul x7, x9, x7
-; CHECK-NEXT: eor x5, x5, x24
-; CHECK-NEXT: str x7, [sp, #1192] // 8-byte Spill
-; CHECK-NEXT: ldr x7, [sp, #304] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #80] // 8-byte Reload
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #152] // 8-byte Reload
-; CHECK-NEXT: eor x5, x5, x7
-; CHECK-NEXT: and x7, x8, #0x20000000
-; CHECK-NEXT: eor x0, x5, x0
-; CHECK-NEXT: mul x5, x9, x7
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #184] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1176] // 8-byte Spill
-; CHECK-NEXT: ldp x7, x5, [sp, #120] // 16-byte Folded Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: and x5, x8, #0x40000000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #200] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1160] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #144] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: and x5, x8, #0x80000000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: ldr x7, [sp, #280] // 8-byte Reload
-; CHECK-NEXT: str x5, [sp, #1152] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #232] // 8-byte Reload
-; CHECK-NEXT: eor x0, x0, x5
-; CHECK-NEXT: and x5, x8, #0x100000000
-; CHECK-NEXT: mul x5, x9, x5
-; CHECK-NEXT: eor x0, x0, x7
-; CHECK-NEXT: eor x16, x16, x0
-; CHECK-NEXT: ldr x0, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x0
-; CHECK-NEXT: and x0, x8, #0x200000000
-; CHECK-NEXT: mul x0, x9, x0
-; CHECK-NEXT: str x5, [sp, #1144] // 8-byte Spill
-; CHECK-NEXT: ldr x5, [sp, #64] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x5
-; CHECK-NEXT: ldr x5, [sp, #32] // 8-byte Reload
-; CHECK-NEXT: eor x15, x16, x15
-; CHECK-NEXT: and x16, x8, #0x400000000
-; CHECK-NEXT: mul x16, x9, x16
-; CHECK-NEXT: eor x5, x5, x27
-; CHECK-NEXT: str x0, [sp, #1136] // 8-byte Spill
-; CHECK-NEXT: ldr x0, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: str x15, [sp, #1248] // 8-byte Spill
-; CHECK-NEXT: eor x15, x5, x30
-; CHECK-NEXT: eor x15, x15, x0
-; CHECK-NEXT: str x16, [sp, #1128] // 8-byte Spill
-; CHECK-NEXT: ldr x16, [sp, #56] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #96] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #176] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: and x15, x8, #0x1000000000
-; CHECK-NEXT: mul x15, x9, x15
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #832] // 8-byte Reload
-; CHECK-NEXT: str x15, [sp, #1112] // 8-byte Spill
-; CHECK-NEXT: ldr x15, [sp, #192] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: eor x15, x4, x2
-; CHECK-NEXT: eor x12, x15, x13
-; CHECK-NEXT: str x14, [sp, #1200] // 8-byte Spill
-; CHECK-NEXT: and x14, x8, #0x2000000000
-; CHECK-NEXT: str x12, [sp, #1168] // 8-byte Spill
-; CHECK-NEXT: and x12, x8, #0x4000000000
-; CHECK-NEXT: mul x28, x9, x14
-; CHECK-NEXT: ldr x13, [sp, #896] // 8-byte Reload
-; CHECK-NEXT: ldr x14, [sp, #888] // 8-byte Reload
-; CHECK-NEXT: ldr x15, [sp, #872] // 8-byte Reload
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #880] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: ldr x15, [sp, #848] // 8-byte Reload
-; CHECK-NEXT: str x12, [sp, #1104] // 8-byte Spill
-; CHECK-NEXT: eor x12, x13, x14
-; CHECK-NEXT: ldr x13, [sp, #864] // 8-byte Reload
-; CHECK-NEXT: ldr x14, [sp, #856] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: and x14, x8, #0x8000000000
-; CHECK-NEXT: mul x14, x9, x14
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #840] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #824] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #800] // 8-byte Reload
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: ldr x15, [sp, #816] // 8-byte Reload
-; CHECK-NEXT: str x14, [sp, #1096] // 8-byte Spill
-; CHECK-NEXT: and x14, x8, #0x10000000000
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: mul x14, x9, x14
-; CHECK-NEXT: ldr x15, [sp, #808] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #792] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #744] // 8-byte Reload
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: ldr x15, [sp, #784] // 8-byte Reload
-; CHECK-NEXT: str x14, [sp, #1088] // 8-byte Spill
-; CHECK-NEXT: and x14, x8, #0x20000000000
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: mul x26, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #776] // 8-byte Reload
-; CHECK-NEXT: ldr x15, [sp, #768] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #736] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #760] // 8-byte Reload
-; CHECK-NEXT: ldr x16, [sp, #752] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: and x15, x8, #0x40000000000
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #728] // 8-byte Reload
-; CHECK-NEXT: mul x25, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #672] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #720] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #704] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x80000000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #696] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: mul x27, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #688] // 8-byte Reload
-; CHECK-NEXT: ldr x16, [sp, #616] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: and x14, x8, #0x100000000000
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #664] // 8-byte Reload
-; CHECK-NEXT: mul x29, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #680] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #712] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #608] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #632] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: and x15, x8, #0x200000000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #624] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: mul x30, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #656] // 8-byte Reload
-; CHECK-NEXT: ldr x16, [sp, #576] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: and x14, x8, #0x400000000000
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #600] // 8-byte Reload
-; CHECK-NEXT: mul x24, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #648] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #640] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #568] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #560] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #552] // 8-byte Reload
-; CHECK-NEXT: and x15, x8, #0x800000000000
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #464] // 8-byte Reload
-; CHECK-NEXT: mul x23, x9, x15
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: and x14, x8, #0x1000000000000
-; CHECK-NEXT: ldr x15, [sp, #544] // 8-byte Reload
-; CHECK-NEXT: mul x22, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #536] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #520] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #456] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #592] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: and x15, x8, #0x2000000000000
-; CHECK-NEXT: mul x20, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #584] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #488] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #480] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldp x16, x13, [sp, #504] // 16-byte Folded Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x4000000000000
-; CHECK-NEXT: mul x7, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #400] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: ldr x14, [sp, #472] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #440] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #424] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: and x15, x8, #0x8000000000000
-; CHECK-NEXT: mul x6, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #496] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: ldr x16, [sp, #528] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: ldr x15, [sp, #416] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #432] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x15
-; CHECK-NEXT: eor x21, x12, x13
-; CHECK-NEXT: ldr x12, [sp, #408] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x16
-; CHECK-NEXT: and x13, x8, #0x20000000000000
-; CHECK-NEXT: and x15, x8, #0x10000000000000
-; CHECK-NEXT: eor x12, x14, x12
-; CHECK-NEXT: ldr x14, [sp, #392] // 8-byte Reload
-; CHECK-NEXT: mul x4, x9, x13
-; CHECK-NEXT: ldr x16, [sp, #1336] // 8-byte Reload
-; CHECK-NEXT: eor x19, x12, x14
-; CHECK-NEXT: ldp x14, x12, [sp, #376] // 16-byte Folded Reload
-; CHECK-NEXT: mul x5, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #336] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x12
-; CHECK-NEXT: ldp x13, x12, [sp, #360] // 16-byte Folded Reload
-; CHECK-NEXT: eor x13, x12, x13
-; CHECK-NEXT: ldr x12, [sp, #352] // 8-byte Reload
-; CHECK-NEXT: eor x14, x14, x13
-; CHECK-NEXT: ldr x13, [sp, #344] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x15
-; CHECK-NEXT: and x15, x8, #0x40000000000000
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: mul x3, x9, x15
-; CHECK-NEXT: and x15, x8, #0x80000000000000
-; CHECK-NEXT: eor x14, x14, x12
-; CHECK-NEXT: ldp x13, x12, [sp, #320] // 16-byte Folded Reload
-; CHECK-NEXT: mul x2, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #1360] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #312] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #1352] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x15
-; CHECK-NEXT: ldr x15, [sp, #1328] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: eor x12, x14, x12
-; CHECK-NEXT: ldr x14, [sp, #1320] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: and x15, x8, #0x100000000000000
-; CHECK-NEXT: ldr x16, [sp, #1272] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #1312] // 8-byte Reload
-; CHECK-NEXT: mul x1, x9, x15
-; CHECK-NEXT: ldr x15, [sp, #1296] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #1304] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #1288] // 8-byte Reload
-; CHECK-NEXT: eor x15, x14, x15
-; CHECK-NEXT: and x14, x8, #0x200000000000000
-; CHECK-NEXT: mul x0, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #1280] // 8-byte Reload
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: ldr x15, [sp, #1240] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #1256] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #1232] // 8-byte Reload
-; CHECK-NEXT: eor x15, x14, x15
-; CHECK-NEXT: and x14, x8, #0x400000000000000
-; CHECK-NEXT: mul x18, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #1264] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: ldr x16, [sp, #1192] // 8-byte Reload
-; CHECK-NEXT: eor x0, x1, x0
-; CHECK-NEXT: and x1, x11, #0x2000000000000000
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #1224] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: ldr x13, [sp, #1216] // 8-byte Reload
-; CHECK-NEXT: eor x15, x15, x14
-; CHECK-NEXT: and x14, x8, #0x800000000000000
-; CHECK-NEXT: mul x17, x9, x14
-; CHECK-NEXT: ldr x14, [sp, #1208] // 8-byte Reload
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: ldr x15, [sp, #1160] // 8-byte Reload
-; CHECK-NEXT: eor x18, x0, x18
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: ldr x14, [sp, #1176] // 8-byte Reload
-; CHECK-NEXT: mul x0, x10, x1
-; CHECK-NEXT: eor x13, x13, x16
-; CHECK-NEXT: ldr x16, [sp, #1152] // 8-byte Reload
-; CHECK-NEXT: eor x15, x14, x15
-; CHECK-NEXT: and x14, x8, #0x1000000000000000
-; CHECK-NEXT: eor x15, x15, x16
-; CHECK-NEXT: mul x16, x9, x14
-; CHECK-NEXT: eor x14, x12, x13
-; CHECK-NEXT: ldr x12, [sp, #1144] // 8-byte Reload
-; CHECK-NEXT: eor x17, x18, x17
-; CHECK-NEXT: and x18, x11, #0x4000000000000000
-; CHECK-NEXT: eor x13, x15, x12
-; CHECK-NEXT: ldr x12, [sp, #1136] // 8-byte Reload
-; CHECK-NEXT: and x15, x8, #0x2000000000000000
-; CHECK-NEXT: and x8, x8, #0x4000000000000000
-; CHECK-NEXT: mul x15, x9, x15
-; CHECK-NEXT: eor x13, x13, x12
-; CHECK-NEXT: ldr x12, [sp, #1104] // 8-byte Reload
-; CHECK-NEXT: eor x16, x17, x16
-; CHECK-NEXT: mul x8, x9, x8
-; CHECK-NEXT: ldr x9, [sp, #1112] // 8-byte Reload
-; CHECK-NEXT: eor x12, x28, x12
-; CHECK-NEXT: ldr x28, [sp, #1128] // 8-byte Reload
-; CHECK-NEXT: mul x17, x10, x18
-; CHECK-NEXT: eor x13, x13, x28
-; CHECK-NEXT: ldr x28, [sp, #1096] // 8-byte Reload
-; CHECK-NEXT: eor x15, x16, x15
-; CHECK-NEXT: eor x12, x12, x28
-; CHECK-NEXT: ldr x28, [sp, #1120] // 8-byte Reload
-; CHECK-NEXT: eor x8, x15, x8
-; CHECK-NEXT: eor x13, x13, x28
-; CHECK-NEXT: ldr x28, [sp, #1088] // 8-byte Reload
-; CHECK-NEXT: eor x9, x13, x9
-; CHECK-NEXT: and x13, x11, #0x80000000000000
-; CHECK-NEXT: eor x12, x12, x28
-; CHECK-NEXT: eor x9, x14, x9
-; CHECK-NEXT: eor x14, x24, x23
-; CHECK-NEXT: eor x12, x12, x26
-; CHECK-NEXT: eor x14, x14, x22
-; CHECK-NEXT: and x23, x11, #0x800000000000000
-; CHECK-NEXT: eor x12, x12, x25
-; CHECK-NEXT: eor x14, x14, x20
-; CHECK-NEXT: mul x22, x10, x23
-; CHECK-NEXT: eor x12, x12, x27
-; CHECK-NEXT: and x20, x11, #0x1000000000000000
-; CHECK-NEXT: and x11, x11, #0x8000000000000000
-; CHECK-NEXT: eor x12, x12, x29
-; CHECK-NEXT: mul x13, x10, x13
-; CHECK-NEXT: eor x12, x12, x30
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: eor x12, x14, x7
-; CHECK-NEXT: mul x14, x10, x20
-; CHECK-NEXT: eor x12, x12, x6
-; CHECK-NEXT: eor x12, x12, x5
-; CHECK-NEXT: mul x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #1168] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x4
-; CHECK-NEXT: eor x12, x12, x3
-; CHECK-NEXT: eor x11, x11, x22
-; CHECK-NEXT: eor x12, x12, x2
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: eor x12, x21, x19
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: eor x9, x11, x14
-; CHECK-NEXT: rbit x11, x12
-; CHECK-NEXT: ldr x12, [sp, #1248] // 8-byte Reload
-; CHECK-NEXT: ldr x14, [sp, #1200] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x0
-; CHECK-NEXT: rbit x8, x8
-; CHECK-NEXT: eor x9, x9, x17
-; CHECK-NEXT: lsr x1, x11, #1
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: ldr x14, [sp, #1184] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #1368] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #1344] // 8-byte Reload
-; CHECK-NEXT: lsr x3, x8, #1
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: eor x8, x12, x13
-; CHECK-NEXT: eor x0, x10, x11
-; CHECK-NEXT: eor x2, x8, x9
-; CHECK-NEXT: add sp, sp, #1376
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_v2i128_neon_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: sub sp, sp, #1376
+; CHECK-NEON-NEXT: .cfi_def_cfa_offset 1472
+; CHECK-NEON-NEXT: .cfi_offset w19, -8
+; CHECK-NEON-NEXT: .cfi_offset w20, -16
+; CHECK-NEON-NEXT: .cfi_offset w21, -24
+; CHECK-NEON-NEXT: .cfi_offset w22, -32
+; CHECK-NEON-NEXT: .cfi_offset w23, -40
+; CHECK-NEON-NEXT: .cfi_offset w24, -48
+; CHECK-NEON-NEXT: .cfi_offset w25, -56
+; CHECK-NEON-NEXT: .cfi_offset w26, -64
+; CHECK-NEON-NEXT: .cfi_offset w27, -72
+; CHECK-NEON-NEXT: .cfi_offset w28, -80
+; CHECK-NEON-NEXT: .cfi_offset w30, -88
+; CHECK-NEON-NEXT: .cfi_offset w29, -96
+; CHECK-NEON-NEXT: fmov x9, d1
+; CHECK-NEON-NEXT: fmov x8, d0
+; CHECK-NEON-NEXT: and x10, x9, #0x2
+; CHECK-NEON-NEXT: mul x0, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x1
+; CHECK-NEON-NEXT: mul x5, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x4
+; CHECK-NEON-NEXT: mul x7, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x8
+; CHECK-NEON-NEXT: mul x24, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x10
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor x5, x7, x24
+; CHECK-NEON-NEXT: str x10, [sp, #1368] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: ldr x7, [sp, #1352] // 8-byte Reload
+; CHECK-NEON-NEXT: str x10, [sp, #1344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: ldr x24, [sp, #1328] // 8-byte Reload
+; CHECK-NEON-NEXT: str x10, [sp, #1320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1312] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1304] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1288] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1280] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1248] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1240] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1264] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1256] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1208] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1152] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1048] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1040] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1080] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1072] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1064] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1056] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1096] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1088] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1008] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #968] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #960] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #992] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #984] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1000] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #976] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1032] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1024] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #1016] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #944] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #904] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #936] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #928] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #920] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #912] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #952] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000000000000
+; CHECK-NEON-NEXT: rbit x9, x9
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: rbit x8, x8
+; CHECK-NEON-NEXT: str x10, [sp, #448] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #896] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #888] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #880] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #872] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #864] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #856] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #848] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #840] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #832] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #824] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #816] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #808] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #800] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #792] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #784] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #776] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #768] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #744] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #736] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #760] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #728] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #752] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #720] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #672] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #704] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #696] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #688] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #680] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #712] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #664] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #616] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #608] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #632] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #624] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #656] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #648] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #640] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #600] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #576] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #568] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #560] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #552] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #544] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #536] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #592] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #584] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #520] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000000
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: stp x10, x11, [sp, #456] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000000
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x4000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: stp x10, x11, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x8000000000000
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x10000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #472] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x20000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: stp x10, x11, [sp, #504] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x40000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #496] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x80000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #528] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x100000000000000
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and x10, x9, #0x200000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #400] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x400000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #424] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x800000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: str x10, [sp, #416] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x1000000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: stp x10, x11, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x10, x9, #0x2000000000000000
+; CHECK-NEON-NEXT: and x9, x9, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: mov x11, v1.d[1]
+; CHECK-NEON-NEXT: mul x8, x8, x9
+; CHECK-NEON-NEXT: str x10, [sp, #408] // 8-byte Spill
+; CHECK-NEON-NEXT: mov x10, v0.d[1]
+; CHECK-NEON-NEXT: str x8, [sp, #392] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x2
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x1
+; CHECK-NEON-NEXT: mul x9, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x4
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x8
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x10
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x20
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x40
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #248] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x80
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x100
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x200
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: ldr x28, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT: str x8, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x400
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x800
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x1000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x2000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x4000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x8000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #304] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x10000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x20000
+; CHECK-NEON-NEXT: mul x26, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x40000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x80000
+; CHECK-NEON-NEXT: mul x9, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x100000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: stp x9, x8, [sp, #72] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x200000
+; CHECK-NEON-NEXT: mul x9, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x400000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x800000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x1000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x2000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x4000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x8000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x10000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #280] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x20000000
+; CHECK-NEON-NEXT: mul x20, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x40000000
+; CHECK-NEON-NEXT: mul x15, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x80000000
+; CHECK-NEON-NEXT: mul x19, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x100000000
+; CHECK-NEON-NEXT: mul x22, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x200000000
+; CHECK-NEON-NEXT: eor x15, x20, x15
+; CHECK-NEON-NEXT: mul x25, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x400000000
+; CHECK-NEON-NEXT: eor x15, x15, x19
+; CHECK-NEON-NEXT: mul x29, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x800000000
+; CHECK-NEON-NEXT: eor x15, x15, x22
+; CHECK-NEON-NEXT: mul x9, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x1000000000
+; CHECK-NEON-NEXT: eor x15, x15, x25
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: eor x15, x15, x29
+; CHECK-NEON-NEXT: str x8, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x2000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x4000000000
+; CHECK-NEON-NEXT: and x9, x11, #0x400000000000000
+; CHECK-NEON-NEXT: mul x27, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x8000000000
+; CHECK-NEON-NEXT: mul x30, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x10000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: mul x13, x10, x9
+; CHECK-NEON-NEXT: rbit x9, x10
+; CHECK-NEON-NEXT: str x8, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x20000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x40000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x80000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x100000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x200000000000
+; CHECK-NEON-NEXT: mul x8, x10, x8
+; CHECK-NEON-NEXT: str x8, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x11, #0x400000000000
+; CHECK-NEON-NEXT: mul x17, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x800000000000
+; CHECK-NEON-NEXT: mul x12, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x1000000000000
+; CHECK-NEON-NEXT: mul x14, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x2000000000000
+; CHECK-NEON-NEXT: mul x18, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x4000000000000
+; CHECK-NEON-NEXT: eor x12, x17, x12
+; CHECK-NEON-NEXT: mul x1, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x8000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: mul x3, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x10000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x18
+; CHECK-NEON-NEXT: mul x6, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x20000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x1
+; CHECK-NEON-NEXT: mul x21, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x40000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x3
+; CHECK-NEON-NEXT: mul x23, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x100000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x6
+; CHECK-NEON-NEXT: mul x4, x10, x8
+; CHECK-NEON-NEXT: and x8, x11, #0x200000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x21
+; CHECK-NEON-NEXT: mul x2, x10, x8
+; CHECK-NEON-NEXT: rbit x8, x11
+; CHECK-NEON-NEXT: eor x12, x12, x23
+; CHECK-NEON-NEXT: and x16, x8, #0x2
+; CHECK-NEON-NEXT: and x14, x8, #0x800000000
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: mul x14, x9, x14
+; CHECK-NEON-NEXT: str x16, [sp, #384] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x1
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #376] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x4
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #368] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x8
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x10
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x20
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x40
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x80
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x100
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x16, x8, #0x200
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: str x16, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x16, x0, x5
+; CHECK-NEON-NEXT: ldr x0, [sp, #1368] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x5, [sp, #1360] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: and x5, x8, #0x400
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #1344] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1320] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1304] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x7, x0
+; CHECK-NEON-NEXT: ldr x7, [sp, #1336] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x5, x8, #0x800
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: ldr x7, [sp, #1312] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1296] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x7, x0
+; CHECK-NEON-NEXT: ldr x7, [sp, #1288] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x5, x8, #0x1000
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: ldr x7, [sp, #1272] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1256] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1336] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #1280] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #1240] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: and x7, x8, #0x2000
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: mul x5, x9, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #1176] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1328] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #1232] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #1216] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1208] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: and x7, x8, #0x4000
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: ldr x24, [sp, #1120] // 8-byte Reload
+; CHECK-NEON-NEXT: str x14, [sp, #1120] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x14, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: and x5, x8, #0x8000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: str x7, [sp, #1320] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #1192] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #1168] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1312] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #1184] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1224] // 8-byte Reload
+; CHECK-NEON-NEXT: str x12, [sp, #1184] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #1112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: and x7, x8, #0x10000
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: ldr x0, [sp, #1152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1048] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: and x5, x8, #0x20000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: str x7, [sp, #1304] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #1136] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #1104] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1296] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #1128] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #1040] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1080] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1072] // 8-byte Reload
+; CHECK-NEON-NEXT: and x7, x8, #0x40000
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #968] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: and x5, x8, #0x80000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: str x7, [sp, #1288] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #1064] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1280] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #1056] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #1008] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #960] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1096] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: and x7, x8, #0x100000
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #992] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1032] // 8-byte Reload
+; CHECK-NEON-NEXT: str x7, [sp, #1272] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #1088] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #984] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #1000] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: and x7, x8, #0x200000
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: mul x5, x9, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #904] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1264] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #976] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #944] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #936] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: and x7, x8, #0x400000
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #1016] // 8-byte Reload
+; CHECK-NEON-NEXT: str x7, [sp, #1256] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #1024] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #928] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #920] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: and x7, x8, #0x800000
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: and x0, x8, #0x1000000
+; CHECK-NEON-NEXT: str x16, [sp, #1368] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x16, [sp, #912] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #168] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x0, x9, x0
+; CHECK-NEON-NEXT: eor x16, x5, x16
+; CHECK-NEON-NEXT: ldr x5, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT: str x7, [sp, #1240] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: ldr x7, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT: str x0, [sp, #1232] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x0, [sp, #952] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: eor x0, x5, x7
+; CHECK-NEON-NEXT: and x5, x8, #0x2000000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: ldr x7, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x24, x24, x28
+; CHECK-NEON-NEXT: str x5, [sp, #1224] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: ldr x7, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x24, x7
+; CHECK-NEON-NEXT: and x24, x8, #0x4000000
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: mul x7, x9, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT: str x7, [sp, #1216] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #448] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT: str x16, [sp, #1344] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x16, x0, x5
+; CHECK-NEON-NEXT: and x0, x8, #0x8000000
+; CHECK-NEON-NEXT: mul x0, x9, x0
+; CHECK-NEON-NEXT: ldr x5, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x7, x7, x26
+; CHECK-NEON-NEXT: str x0, [sp, #1208] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x0, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: ldr x5, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x7, x5
+; CHECK-NEON-NEXT: and x7, x8, #0x10000000
+; CHECK-NEON-NEXT: mul x7, x9, x7
+; CHECK-NEON-NEXT: eor x5, x5, x24
+; CHECK-NEON-NEXT: str x7, [sp, #1192] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x7, [sp, #304] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x5, x5, x7
+; CHECK-NEON-NEXT: and x7, x8, #0x20000000
+; CHECK-NEON-NEXT: eor x0, x5, x0
+; CHECK-NEON-NEXT: mul x5, x9, x7
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1176] // 8-byte Spill
+; CHECK-NEON-NEXT: ldp x7, x5, [sp, #120] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: and x5, x8, #0x40000000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1160] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: and x5, x8, #0x80000000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: ldr x7, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT: str x5, [sp, #1152] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x0, x5
+; CHECK-NEON-NEXT: and x5, x8, #0x100000000
+; CHECK-NEON-NEXT: mul x5, x9, x5
+; CHECK-NEON-NEXT: eor x0, x0, x7
+; CHECK-NEON-NEXT: eor x16, x16, x0
+; CHECK-NEON-NEXT: ldr x0, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x0
+; CHECK-NEON-NEXT: and x0, x8, #0x200000000
+; CHECK-NEON-NEXT: mul x0, x9, x0
+; CHECK-NEON-NEXT: str x5, [sp, #1144] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x5, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x5
+; CHECK-NEON-NEXT: ldr x5, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x16, x15
+; CHECK-NEON-NEXT: and x16, x8, #0x400000000
+; CHECK-NEON-NEXT: mul x16, x9, x16
+; CHECK-NEON-NEXT: eor x5, x5, x27
+; CHECK-NEON-NEXT: str x0, [sp, #1136] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x0, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT: str x15, [sp, #1248] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x15, x5, x30
+; CHECK-NEON-NEXT: eor x15, x15, x0
+; CHECK-NEON-NEXT: str x16, [sp, #1128] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x16, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #176] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: and x15, x8, #0x1000000000
+; CHECK-NEON-NEXT: mul x15, x9, x15
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #832] // 8-byte Reload
+; CHECK-NEON-NEXT: str x15, [sp, #1112] // 8-byte Spill
+; CHECK-NEON-NEXT: ldr x15, [sp, #192] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: eor x15, x4, x2
+; CHECK-NEON-NEXT: eor x12, x15, x13
+; CHECK-NEON-NEXT: str x14, [sp, #1200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x14, x8, #0x2000000000
+; CHECK-NEON-NEXT: str x12, [sp, #1168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x12, x8, #0x4000000000
+; CHECK-NEON-NEXT: mul x28, x9, x14
+; CHECK-NEON-NEXT: ldr x13, [sp, #896] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x14, [sp, #888] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x15, [sp, #872] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #880] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #848] // 8-byte Reload
+; CHECK-NEON-NEXT: str x12, [sp, #1104] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x12, x13, x14
+; CHECK-NEON-NEXT: ldr x13, [sp, #864] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x14, [sp, #856] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x8000000000
+; CHECK-NEON-NEXT: mul x14, x9, x14
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #840] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #824] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #800] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: ldr x15, [sp, #816] // 8-byte Reload
+; CHECK-NEON-NEXT: str x14, [sp, #1096] // 8-byte Spill
+; CHECK-NEON-NEXT: and x14, x8, #0x10000000000
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: mul x14, x9, x14
+; CHECK-NEON-NEXT: ldr x15, [sp, #808] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #792] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #744] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: ldr x15, [sp, #784] // 8-byte Reload
+; CHECK-NEON-NEXT: str x14, [sp, #1088] // 8-byte Spill
+; CHECK-NEON-NEXT: and x14, x8, #0x20000000000
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: mul x26, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #776] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x15, [sp, #768] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #736] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #760] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x16, [sp, #752] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: and x15, x8, #0x40000000000
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #728] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x25, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #672] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #720] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #704] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x80000000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #696] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: mul x27, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #688] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x16, [sp, #616] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: and x14, x8, #0x100000000000
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #664] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x29, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #680] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #712] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #608] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #632] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: and x15, x8, #0x200000000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #624] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: mul x30, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #656] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x16, [sp, #576] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: and x14, x8, #0x400000000000
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #600] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x24, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #648] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #640] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #568] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #560] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #552] // 8-byte Reload
+; CHECK-NEON-NEXT: and x15, x8, #0x800000000000
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #464] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x23, x9, x15
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: and x14, x8, #0x1000000000000
+; CHECK-NEON-NEXT: ldr x15, [sp, #544] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x22, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #536] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #520] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #456] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #592] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: and x15, x8, #0x2000000000000
+; CHECK-NEON-NEXT: mul x20, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #584] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #488] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #480] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldp x16, x13, [sp, #504] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x4000000000000
+; CHECK-NEON-NEXT: mul x7, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #400] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: ldr x14, [sp, #472] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #440] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #424] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x8000000000000
+; CHECK-NEON-NEXT: mul x6, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #496] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #528] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #416] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #432] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x15
+; CHECK-NEON-NEXT: eor x21, x12, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #408] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x16
+; CHECK-NEON-NEXT: and x13, x8, #0x20000000000000
+; CHECK-NEON-NEXT: and x15, x8, #0x10000000000000
+; CHECK-NEON-NEXT: eor x12, x14, x12
+; CHECK-NEON-NEXT: ldr x14, [sp, #392] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x4, x9, x13
+; CHECK-NEON-NEXT: ldr x16, [sp, #1336] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x19, x12, x14
+; CHECK-NEON-NEXT: ldp x14, x12, [sp, #376] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x5, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #336] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x12
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #360] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x13, x12, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #352] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x14, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #344] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x40000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: mul x3, x9, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x80000000000000
+; CHECK-NEON-NEXT: eor x14, x14, x12
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x2, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #1360] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #1352] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #1328] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: eor x12, x14, x12
+; CHECK-NEON-NEXT: ldr x14, [sp, #1320] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x100000000000000
+; CHECK-NEON-NEXT: ldr x16, [sp, #1272] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1312] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x1, x9, x15
+; CHECK-NEON-NEXT: ldr x15, [sp, #1296] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1304] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #1288] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x14, x15
+; CHECK-NEON-NEXT: and x14, x8, #0x200000000000000
+; CHECK-NEON-NEXT: mul x0, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1280] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: ldr x15, [sp, #1240] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1256] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #1232] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x14, x15
+; CHECK-NEON-NEXT: and x14, x8, #0x400000000000000
+; CHECK-NEON-NEXT: mul x18, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #1192] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x0, x1, x0
+; CHECK-NEON-NEXT: and x1, x11, #0x2000000000000000
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1224] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #1216] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x15, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x800000000000000
+; CHECK-NEON-NEXT: mul x17, x9, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1208] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: ldr x15, [sp, #1160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x18, x0, x18
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1176] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x0, x10, x1
+; CHECK-NEON-NEXT: eor x13, x13, x16
+; CHECK-NEON-NEXT: ldr x16, [sp, #1152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x14, x15
+; CHECK-NEON-NEXT: and x14, x8, #0x1000000000000000
+; CHECK-NEON-NEXT: eor x15, x15, x16
+; CHECK-NEON-NEXT: mul x16, x9, x14
+; CHECK-NEON-NEXT: eor x14, x12, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #1144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x17, x18, x17
+; CHECK-NEON-NEXT: and x18, x11, #0x4000000000000000
+; CHECK-NEON-NEXT: eor x13, x15, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1136] // 8-byte Reload
+; CHECK-NEON-NEXT: and x15, x8, #0x2000000000000000
+; CHECK-NEON-NEXT: and x8, x8, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x15, x9, x15
+; CHECK-NEON-NEXT: eor x13, x13, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x17, x16
+; CHECK-NEON-NEXT: mul x8, x9, x8
+; CHECK-NEON-NEXT: ldr x9, [sp, #1112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x28, x12
+; CHECK-NEON-NEXT: ldr x28, [sp, #1128] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x17, x10, x18
+; CHECK-NEON-NEXT: eor x13, x13, x28
+; CHECK-NEON-NEXT: ldr x28, [sp, #1096] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x16, x15
+; CHECK-NEON-NEXT: eor x12, x12, x28
+; CHECK-NEON-NEXT: ldr x28, [sp, #1120] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x15, x8
+; CHECK-NEON-NEXT: eor x13, x13, x28
+; CHECK-NEON-NEXT: ldr x28, [sp, #1088] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x13, x9
+; CHECK-NEON-NEXT: and x13, x11, #0x80000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x28
+; CHECK-NEON-NEXT: eor x9, x14, x9
+; CHECK-NEON-NEXT: eor x14, x24, x23
+; CHECK-NEON-NEXT: eor x12, x12, x26
+; CHECK-NEON-NEXT: eor x14, x14, x22
+; CHECK-NEON-NEXT: and x23, x11, #0x800000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x25
+; CHECK-NEON-NEXT: eor x14, x14, x20
+; CHECK-NEON-NEXT: mul x22, x10, x23
+; CHECK-NEON-NEXT: eor x12, x12, x27
+; CHECK-NEON-NEXT: and x20, x11, #0x1000000000000000
+; CHECK-NEON-NEXT: and x11, x11, #0x8000000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x29
+; CHECK-NEON-NEXT: mul x13, x10, x13
+; CHECK-NEON-NEXT: eor x12, x12, x30
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: eor x12, x14, x7
+; CHECK-NEON-NEXT: mul x14, x10, x20
+; CHECK-NEON-NEXT: eor x12, x12, x6
+; CHECK-NEON-NEXT: eor x12, x12, x5
+; CHECK-NEON-NEXT: mul x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #1168] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x4
+; CHECK-NEON-NEXT: eor x12, x12, x3
+; CHECK-NEON-NEXT: eor x11, x11, x22
+; CHECK-NEON-NEXT: eor x12, x12, x2
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: eor x12, x21, x19
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: eor x9, x11, x14
+; CHECK-NEON-NEXT: rbit x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #1248] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x14, [sp, #1200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x0
+; CHECK-NEON-NEXT: rbit x8, x8
+; CHECK-NEON-NEXT: eor x9, x9, x17
+; CHECK-NEON-NEXT: lsr x1, x11, #1
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #1184] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #1368] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #1344] // 8-byte Reload
+; CHECK-NEON-NEXT: lsr x3, x8, #1
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: eor x8, x12, x13
+; CHECK-NEON-NEXT: eor x0, x10, x11
+; CHECK-NEON-NEXT: eor x2, x8, x9
+; CHECK-NEON-NEXT: add sp, sp, #1376
+; CHECK-NEON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_v2i128_neon_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: fmov x10, d0
+; CHECK-AES-NEXT: fmov x11, d1
+; CHECK-AES-NEXT: mov x8, v0.d[1]
+; CHECK-AES-NEXT: mov x9, v1.d[1]
+; CHECK-AES-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-AES-NEXT: ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-AES-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT: rbit x11, x11
+; CHECK-AES-NEXT: rbit x10, x10
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: fmov d4, x11
+; CHECK-AES-NEXT: fmov d5, x10
+; CHECK-AES-NEXT: rbit x9, x9
+; CHECK-AES-NEXT: fmov d6, x8
+; CHECK-AES-NEXT: fmov x0, d0
+; CHECK-AES-NEXT: pmull v1.1q, v3.1d, v2.1d
+; CHECK-AES-NEXT: pmull v4.1q, v5.1d, v4.1d
+; CHECK-AES-NEXT: fmov d5, x9
+; CHECK-AES-NEXT: pmull v5.1q, v6.1d, v5.1d
+; CHECK-AES-NEXT: fmov x2, d1
+; CHECK-AES-NEXT: fmov x8, d4
+; CHECK-AES-NEXT: fmov x9, d5
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: lsr x1, x8, #1
+; CHECK-AES-NEXT: rbit x9, x9
+; CHECK-AES-NEXT: lsr x3, x9, #1
+; CHECK-AES-NEXT: ret
%zextx = zext <2 x i64> %x to <2 x i128>
%zexty = zext <2 x i64> %y to <2 x i128>
%a = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %zextx, <2 x i128> %zexty)
diff --git a/llvm/test/CodeGen/AArch64/clmul.ll b/llvm/test/CodeGen/AArch64/clmul.ll
index 75f418f5bfb42..7a94696711882 100644
--- a/llvm/test/CodeGen/AArch64/clmul.ll
+++ b/llvm/test/CodeGen/AArch64/clmul.ll
@@ -1,499 +1,515 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+aes | FileCheck %s --check-prefixes=CHECK,CHECK-AES
define i8 @clmul_i8(i8 %x, i8 %y) {
; CHECK-LABEL: clmul_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0x2
-; CHECK-NEXT: and w9, w1, #0x1
-; CHECK-NEXT: and w10, w1, #0x4
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: and w11, w1, #0x8
-; CHECK-NEXT: and w12, w1, #0x10
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: and w13, w1, #0x20
-; CHECK-NEXT: and w14, w1, #0x40
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: mul w12, w0, w12
-; CHECK-NEXT: eor w8, w9, w8
-; CHECK-NEXT: and w9, w1, #0xffffff80
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: mul w14, w0, w14
-; CHECK-NEXT: eor w10, w10, w11
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: eor w8, w8, w10
-; CHECK-NEXT: eor w11, w12, w13
-; CHECK-NEXT: eor w10, w11, w14
-; CHECK-NEXT: eor w8, w8, w10
-; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: pmul v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%a = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
ret i8 %a
}
define i16 @clmul_i16(i16 %x, i16 %y) {
-; CHECK-LABEL: clmul_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0x2
-; CHECK-NEXT: and w9, w1, #0x1
-; CHECK-NEXT: and w10, w1, #0x4
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: and w11, w1, #0x8
-; CHECK-NEXT: and w12, w1, #0x10
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: and w13, w1, #0x20
-; CHECK-NEXT: and w15, w1, #0x80
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: and w16, w1, #0x100
-; CHECK-NEXT: and w2, w1, #0x800
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: and w14, w1, #0x40
-; CHECK-NEXT: and w17, w1, #0x200
-; CHECK-NEXT: mul w12, w0, w12
-; CHECK-NEXT: eor w8, w9, w8
-; CHECK-NEXT: and w9, w1, #0x1000
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: and w18, w1, #0x400
-; CHECK-NEXT: mul w15, w0, w15
-; CHECK-NEXT: eor w10, w10, w11
-; CHECK-NEXT: and w11, w1, #0x2000
-; CHECK-NEXT: mul w16, w0, w16
-; CHECK-NEXT: eor w8, w8, w10
-; CHECK-NEXT: and w10, w1, #0x4000
-; CHECK-NEXT: mul w2, w0, w2
-; CHECK-NEXT: eor w12, w12, w13
-; CHECK-NEXT: and w13, w1, #0xffff8000
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: mul w14, w0, w14
-; CHECK-NEXT: eor w15, w15, w16
-; CHECK-NEXT: mul w17, w0, w17
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: eor w9, w2, w9
-; CHECK-NEXT: mul w18, w0, w18
-; CHECK-NEXT: eor w12, w12, w14
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: eor w14, w15, w17
-; CHECK-NEXT: eor w8, w8, w12
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: eor w9, w9, w11
-; CHECK-NEXT: eor w11, w14, w18
-; CHECK-NEXT: eor w9, w9, w10
-; CHECK-NEXT: eor w8, w8, w11
-; CHECK-NEXT: eor w9, w9, w13
-; CHECK-NEXT: eor w0, w8, w9
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i16:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: and w8, w1, #0x2
+; CHECK-NEON-NEXT: and w9, w1, #0x1
+; CHECK-NEON-NEXT: and w10, w1, #0x4
+; CHECK-NEON-NEXT: mul w8, w0, w8
+; CHECK-NEON-NEXT: and w11, w1, #0x8
+; CHECK-NEON-NEXT: and w12, w1, #0x10
+; CHECK-NEON-NEXT: mul w9, w0, w9
+; CHECK-NEON-NEXT: and w13, w1, #0x20
+; CHECK-NEON-NEXT: and w15, w1, #0x80
+; CHECK-NEON-NEXT: mul w10, w0, w10
+; CHECK-NEON-NEXT: and w16, w1, #0x100
+; CHECK-NEON-NEXT: and w2, w1, #0x800
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: and w14, w1, #0x40
+; CHECK-NEON-NEXT: and w17, w1, #0x200
+; CHECK-NEON-NEXT: mul w12, w0, w12
+; CHECK-NEON-NEXT: eor w8, w9, w8
+; CHECK-NEON-NEXT: and w9, w1, #0x1000
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: and w18, w1, #0x400
+; CHECK-NEON-NEXT: mul w15, w0, w15
+; CHECK-NEON-NEXT: eor w10, w10, w11
+; CHECK-NEON-NEXT: and w11, w1, #0x2000
+; CHECK-NEON-NEXT: mul w16, w0, w16
+; CHECK-NEON-NEXT: eor w8, w8, w10
+; CHECK-NEON-NEXT: and w10, w1, #0x4000
+; CHECK-NEON-NEXT: mul w2, w0, w2
+; CHECK-NEON-NEXT: eor w12, w12, w13
+; CHECK-NEON-NEXT: and w13, w1, #0xffff8000
+; CHECK-NEON-NEXT: mul w9, w0, w9
+; CHECK-NEON-NEXT: mul w14, w0, w14
+; CHECK-NEON-NEXT: eor w15, w15, w16
+; CHECK-NEON-NEXT: mul w17, w0, w17
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: eor w9, w2, w9
+; CHECK-NEON-NEXT: mul w18, w0, w18
+; CHECK-NEON-NEXT: eor w12, w12, w14
+; CHECK-NEON-NEXT: mul w10, w0, w10
+; CHECK-NEON-NEXT: eor w14, w15, w17
+; CHECK-NEON-NEXT: eor w8, w8, w12
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: eor w9, w9, w11
+; CHECK-NEON-NEXT: eor w11, w14, w18
+; CHECK-NEON-NEXT: eor w9, w9, w10
+; CHECK-NEON-NEXT: eor w8, w8, w11
+; CHECK-NEON-NEXT: eor w9, w9, w13
+; CHECK-NEON-NEXT: eor w0, w8, w9
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i16:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: fmov s0, w1
+; CHECK-AES-NEXT: fmov s1, w0
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov w0, s0
+; CHECK-AES-NEXT: ret
%a = call i16 @llvm.clmul.i16(i16 %x, i16 %y)
ret i16 %a
}
define i32 @clmul_i32(i32 %x, i32 %y) {
-; CHECK-LABEL: clmul_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0x2
-; CHECK-NEXT: and w9, w1, #0x1
-; CHECK-NEXT: and w10, w1, #0x4
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: and w11, w1, #0x8
-; CHECK-NEXT: and w12, w1, #0x10
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: and w13, w1, #0x20
-; CHECK-NEXT: and w14, w1, #0x40
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: and w2, w1, #0x800
-; CHECK-NEXT: and w15, w1, #0x80
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: and w16, w1, #0x100
-; CHECK-NEXT: and w17, w1, #0x200
-; CHECK-NEXT: mul w12, w0, w12
-; CHECK-NEXT: eor w8, w9, w8
-; CHECK-NEXT: and w9, w1, #0x1000
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: and w18, w1, #0x400
-; CHECK-NEXT: mul w14, w0, w14
-; CHECK-NEXT: eor w10, w10, w11
-; CHECK-NEXT: and w11, w1, #0x2000
-; CHECK-NEXT: mul w2, w0, w2
-; CHECK-NEXT: eor w8, w8, w10
-; CHECK-NEXT: and w10, w1, #0x4000
-; CHECK-NEXT: mul w9, w0, w9
-; CHECK-NEXT: eor w12, w12, w13
-; CHECK-NEXT: and w13, w1, #0x8000
-; CHECK-NEXT: mul w15, w0, w15
-; CHECK-NEXT: eor w12, w12, w14
-; CHECK-NEXT: and w14, w1, #0x10000
-; CHECK-NEXT: mul w16, w0, w16
-; CHECK-NEXT: eor w8, w8, w12
-; CHECK-NEXT: and w12, w1, #0x20000
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: eor w9, w2, w9
-; CHECK-NEXT: and w2, w1, #0x400000
-; CHECK-NEXT: mul w17, w0, w17
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: eor w15, w15, w16
-; CHECK-NEXT: and w16, w1, #0x40000
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: eor w9, w9, w11
-; CHECK-NEXT: and w11, w1, #0x800000
-; CHECK-NEXT: mul w18, w0, w18
-; CHECK-NEXT: eor w15, w15, w17
-; CHECK-NEXT: and w17, w1, #0x80000
-; CHECK-NEXT: mul w14, w0, w14
-; CHECK-NEXT: eor w9, w9, w10
-; CHECK-NEXT: and w10, w1, #0x1000000
-; CHECK-NEXT: mul w12, w0, w12
-; CHECK-NEXT: eor w9, w9, w13
-; CHECK-NEXT: and w13, w1, #0x2000000
-; CHECK-NEXT: mul w16, w0, w16
-; CHECK-NEXT: eor w15, w15, w18
-; CHECK-NEXT: and w18, w1, #0x100000
-; CHECK-NEXT: mul w2, w0, w2
-; CHECK-NEXT: eor w8, w8, w15
-; CHECK-NEXT: and w15, w1, #0x200000
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: eor w12, w14, w12
-; CHECK-NEXT: and w14, w1, #0x4000000
-; CHECK-NEXT: mul w17, w0, w17
-; CHECK-NEXT: eor w12, w12, w16
-; CHECK-NEXT: and w16, w1, #0x8000000
-; CHECK-NEXT: mul w10, w0, w10
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: eor w11, w2, w11
-; CHECK-NEXT: and w2, w1, #0x20000000
-; CHECK-NEXT: mul w18, w0, w18
-; CHECK-NEXT: eor w12, w12, w17
-; CHECK-NEXT: and w17, w1, #0x10000000
-; CHECK-NEXT: mul w14, w0, w14
-; CHECK-NEXT: eor w10, w11, w10
-; CHECK-NEXT: and w11, w1, #0x40000000
-; CHECK-NEXT: mul w15, w0, w15
-; CHECK-NEXT: eor w10, w10, w13
-; CHECK-NEXT: and w13, w1, #0x80000000
-; CHECK-NEXT: mul w16, w0, w16
-; CHECK-NEXT: eor w12, w12, w18
-; CHECK-NEXT: mul w17, w0, w17
-; CHECK-NEXT: eor w10, w10, w14
-; CHECK-NEXT: mul w2, w0, w2
-; CHECK-NEXT: eor w9, w12, w15
-; CHECK-NEXT: mul w11, w0, w11
-; CHECK-NEXT: eor w10, w10, w16
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mul w13, w0, w13
-; CHECK-NEXT: eor w9, w10, w17
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: eor w10, w2, w11
-; CHECK-NEXT: eor w9, w10, w13
-; CHECK-NEXT: eor w0, w8, w9
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i32:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: and w8, w1, #0x2
+; CHECK-NEON-NEXT: and w9, w1, #0x1
+; CHECK-NEON-NEXT: and w10, w1, #0x4
+; CHECK-NEON-NEXT: mul w8, w0, w8
+; CHECK-NEON-NEXT: and w11, w1, #0x8
+; CHECK-NEON-NEXT: and w12, w1, #0x10
+; CHECK-NEON-NEXT: mul w9, w0, w9
+; CHECK-NEON-NEXT: and w13, w1, #0x20
+; CHECK-NEON-NEXT: and w14, w1, #0x40
+; CHECK-NEON-NEXT: mul w10, w0, w10
+; CHECK-NEON-NEXT: and w2, w1, #0x800
+; CHECK-NEON-NEXT: and w15, w1, #0x80
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: and w16, w1, #0x100
+; CHECK-NEON-NEXT: and w17, w1, #0x200
+; CHECK-NEON-NEXT: mul w12, w0, w12
+; CHECK-NEON-NEXT: eor w8, w9, w8
+; CHECK-NEON-NEXT: and w9, w1, #0x1000
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: and w18, w1, #0x400
+; CHECK-NEON-NEXT: mul w14, w0, w14
+; CHECK-NEON-NEXT: eor w10, w10, w11
+; CHECK-NEON-NEXT: and w11, w1, #0x2000
+; CHECK-NEON-NEXT: mul w2, w0, w2
+; CHECK-NEON-NEXT: eor w8, w8, w10
+; CHECK-NEON-NEXT: and w10, w1, #0x4000
+; CHECK-NEON-NEXT: mul w9, w0, w9
+; CHECK-NEON-NEXT: eor w12, w12, w13
+; CHECK-NEON-NEXT: and w13, w1, #0x8000
+; CHECK-NEON-NEXT: mul w15, w0, w15
+; CHECK-NEON-NEXT: eor w12, w12, w14
+; CHECK-NEON-NEXT: and w14, w1, #0x10000
+; CHECK-NEON-NEXT: mul w16, w0, w16
+; CHECK-NEON-NEXT: eor w8, w8, w12
+; CHECK-NEON-NEXT: and w12, w1, #0x20000
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: eor w9, w2, w9
+; CHECK-NEON-NEXT: and w2, w1, #0x400000
+; CHECK-NEON-NEXT: mul w17, w0, w17
+; CHECK-NEON-NEXT: mul w10, w0, w10
+; CHECK-NEON-NEXT: eor w15, w15, w16
+; CHECK-NEON-NEXT: and w16, w1, #0x40000
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: eor w9, w9, w11
+; CHECK-NEON-NEXT: and w11, w1, #0x800000
+; CHECK-NEON-NEXT: mul w18, w0, w18
+; CHECK-NEON-NEXT: eor w15, w15, w17
+; CHECK-NEON-NEXT: and w17, w1, #0x80000
+; CHECK-NEON-NEXT: mul w14, w0, w14
+; CHECK-NEON-NEXT: eor w9, w9, w10
+; CHECK-NEON-NEXT: and w10, w1, #0x1000000
+; CHECK-NEON-NEXT: mul w12, w0, w12
+; CHECK-NEON-NEXT: eor w9, w9, w13
+; CHECK-NEON-NEXT: and w13, w1, #0x2000000
+; CHECK-NEON-NEXT: mul w16, w0, w16
+; CHECK-NEON-NEXT: eor w15, w15, w18
+; CHECK-NEON-NEXT: and w18, w1, #0x100000
+; CHECK-NEON-NEXT: mul w2, w0, w2
+; CHECK-NEON-NEXT: eor w8, w8, w15
+; CHECK-NEON-NEXT: and w15, w1, #0x200000
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: eor w12, w14, w12
+; CHECK-NEON-NEXT: and w14, w1, #0x4000000
+; CHECK-NEON-NEXT: mul w17, w0, w17
+; CHECK-NEON-NEXT: eor w12, w12, w16
+; CHECK-NEON-NEXT: and w16, w1, #0x8000000
+; CHECK-NEON-NEXT: mul w10, w0, w10
+; CHECK-NEON-NEXT: eor w8, w8, w9
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: eor w11, w2, w11
+; CHECK-NEON-NEXT: and w2, w1, #0x20000000
+; CHECK-NEON-NEXT: mul w18, w0, w18
+; CHECK-NEON-NEXT: eor w12, w12, w17
+; CHECK-NEON-NEXT: and w17, w1, #0x10000000
+; CHECK-NEON-NEXT: mul w14, w0, w14
+; CHECK-NEON-NEXT: eor w10, w11, w10
+; CHECK-NEON-NEXT: and w11, w1, #0x40000000
+; CHECK-NEON-NEXT: mul w15, w0, w15
+; CHECK-NEON-NEXT: eor w10, w10, w13
+; CHECK-NEON-NEXT: and w13, w1, #0x80000000
+; CHECK-NEON-NEXT: mul w16, w0, w16
+; CHECK-NEON-NEXT: eor w12, w12, w18
+; CHECK-NEON-NEXT: mul w17, w0, w17
+; CHECK-NEON-NEXT: eor w10, w10, w14
+; CHECK-NEON-NEXT: mul w2, w0, w2
+; CHECK-NEON-NEXT: eor w9, w12, w15
+; CHECK-NEON-NEXT: mul w11, w0, w11
+; CHECK-NEON-NEXT: eor w10, w10, w16
+; CHECK-NEON-NEXT: eor w8, w8, w9
+; CHECK-NEON-NEXT: mul w13, w0, w13
+; CHECK-NEON-NEXT: eor w9, w10, w17
+; CHECK-NEON-NEXT: eor w8, w8, w9
+; CHECK-NEON-NEXT: eor w10, w2, w11
+; CHECK-NEON-NEXT: eor w9, w10, w13
+; CHECK-NEON-NEXT: eor w0, w8, w9
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i32:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: fmov s0, w1
+; CHECK-AES-NEXT: fmov s1, w0
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov w0, s0
+; CHECK-AES-NEXT: ret
%a = call i32 @llvm.clmul.i32(i32 %x, i32 %y)
ret i32 %a
}
define i64 @clmul_i64(i64 %x, i64 %y) {
-; CHECK-LABEL: clmul_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #304
-; CHECK-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 304
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: and x8, x1, #0x2
-; CHECK-NEXT: mul x9, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1
-; CHECK-NEXT: mul x10, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4
-; CHECK-NEXT: mul x11, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8
-; CHECK-NEXT: mul x13, x0, x8
-; CHECK-NEXT: and x8, x1, #0x10
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: mul x12, x0, x8
-; CHECK-NEXT: and x8, x1, #0x20
-; CHECK-NEXT: mul x14, x0, x8
-; CHECK-NEXT: and x8, x1, #0x40
-; CHECK-NEXT: eor x10, x11, x13
-; CHECK-NEXT: and x11, x1, #0x10000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #200] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x80
-; CHECK-NEXT: mul x15, x0, x8
-; CHECK-NEXT: and x8, x1, #0x100
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #160] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x200
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #152] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x400
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #184] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x800
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #192] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x1000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #144] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x2000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #136] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x4000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #176] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x8000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #168] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x10000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #120] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x20000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #80] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x40000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #72] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x80000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #104] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x100000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #96] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x200000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #128] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x400000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #112] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x800000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #64] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x1000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #40] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x2000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: str x8, [sp, #32] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x4000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #56] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x8000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #48] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x10000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #88] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x20000000
-; CHECK-NEXT: mul x26, x0, x8
-; CHECK-NEXT: and x8, x1, #0x40000000
-; CHECK-NEXT: mul x22, x0, x8
-; CHECK-NEXT: and x8, x1, #0x80000000
-; CHECK-NEXT: mul x23, x0, x8
-; CHECK-NEXT: and x8, x1, #0x100000000
-; CHECK-NEXT: mul x24, x0, x8
-; CHECK-NEXT: and x8, x1, #0x200000000
-; CHECK-NEXT: eor x22, x26, x22
-; CHECK-NEXT: ldr x26, [sp, #32] // 8-byte Reload
-; CHECK-NEXT: mul x25, x0, x8
-; CHECK-NEXT: and x8, x1, #0x400000000
-; CHECK-NEXT: eor x22, x22, x23
-; CHECK-NEXT: and x23, x1, #0x400000000000000
-; CHECK-NEXT: mul x27, x0, x8
-; CHECK-NEXT: and x8, x1, #0x800000000
-; CHECK-NEXT: eor x22, x22, x24
-; CHECK-NEXT: ldr x24, [sp, #48] // 8-byte Reload
-; CHECK-NEXT: mul x28, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1000000000
-; CHECK-NEXT: eor x22, x22, x25
-; CHECK-NEXT: ldr x25, [sp, #88] // 8-byte Reload
-; CHECK-NEXT: mul x29, x0, x8
-; CHECK-NEXT: and x8, x1, #0x2000000000
-; CHECK-NEXT: eor x22, x22, x27
-; CHECK-NEXT: mul x21, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4000000000
-; CHECK-NEXT: mul x7, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8000000000
-; CHECK-NEXT: mul x19, x0, x8
-; CHECK-NEXT: and x8, x1, #0x10000000000
-; CHECK-NEXT: mul x5, x0, x8
-; CHECK-NEXT: and x8, x1, #0x20000000000
-; CHECK-NEXT: eor x7, x21, x7
-; CHECK-NEXT: mul x6, x0, x8
-; CHECK-NEXT: and x8, x1, #0x40000000000
-; CHECK-NEXT: mul x20, x0, x8
-; CHECK-NEXT: and x8, x1, #0x80000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: mul x23, x0, x23
-; CHECK-NEXT: str x8, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x100000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x200000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #8] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x400000000000
-; CHECK-NEXT: mul x4, x0, x8
-; CHECK-NEXT: and x8, x1, #0x800000000000
-; CHECK-NEXT: mul x17, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1000000000000
-; CHECK-NEXT: mul x18, x0, x8
-; CHECK-NEXT: and x8, x1, #0x2000000000000
-; CHECK-NEXT: mul x3, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4000000000000
-; CHECK-NEXT: eor x17, x4, x17
-; CHECK-NEXT: mul x2, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8000000000000
-; CHECK-NEXT: eor x17, x17, x18
-; CHECK-NEXT: and x18, x1, #0x4000000000000000
-; CHECK-NEXT: mul x16, x0, x8
-; CHECK-NEXT: eor x8, x9, x10
-; CHECK-NEXT: ldr x9, [sp, #160] // 8-byte Reload
-; CHECK-NEXT: eor x10, x12, x14
-; CHECK-NEXT: ldr x12, [sp, #80] // 8-byte Reload
-; CHECK-NEXT: eor x17, x17, x3
-; CHECK-NEXT: eor x9, x15, x9
-; CHECK-NEXT: mul x15, x0, x11
-; CHECK-NEXT: ldr x11, [sp, #200] // 8-byte Reload
-; CHECK-NEXT: eor x17, x17, x2
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #152] // 8-byte Reload
-; CHECK-NEXT: mul x18, x0, x18
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #184] // 8-byte Reload
-; CHECK-NEXT: eor x16, x17, x16
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: and x11, x1, #0x20000000000000
-; CHECK-NEXT: ldr x17, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: mul x14, x0, x11
-; CHECK-NEXT: and x10, x1, #0x40000000000000
-; CHECK-NEXT: eor x11, x8, x9
-; CHECK-NEXT: ldr x8, [sp, #192] // 8-byte Reload
-; CHECK-NEXT: ldr x9, [sp, #144] // 8-byte Reload
-; CHECK-NEXT: mul x13, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #136] // 8-byte Reload
-; CHECK-NEXT: eor x15, x16, x15
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #120] // 8-byte Reload
-; CHECK-NEXT: ldr x16, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #72] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #176] // 8-byte Reload
-; CHECK-NEXT: eor x14, x15, x14
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: and x10, x1, #0x80000000000000
-; CHECK-NEXT: ldr x15, [sp, #8] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: ldr x12, [sp, #104] // 8-byte Reload
-; CHECK-NEXT: eor x13, x14, x13
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: mul x12, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #168] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldr x10, [sp, #96] // 8-byte Reload
-; CHECK-NEXT: eor x8, x11, x8
-; CHECK-NEXT: ldr x11, [sp, #128] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: and x10, x1, #0x100000000000000
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #64] // 8-byte Reload
-; CHECK-NEXT: mul x10, x0, x10
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldr x9, [sp, #112] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: and x11, x1, #0x200000000000000
-; CHECK-NEXT: eor x9, x9, x30
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: eor x9, x9, x26
-; CHECK-NEXT: ldr x26, [sp, #56] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x26
-; CHECK-NEXT: eor x9, x9, x24
-; CHECK-NEXT: and x24, x1, #0x800000000000000
-; CHECK-NEXT: eor x9, x9, x25
-; CHECK-NEXT: mul x24, x0, x24
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x22, x28
-; CHECK-NEXT: and x22, x1, #0x1000000000000000
-; CHECK-NEXT: eor x9, x9, x29
-; CHECK-NEXT: mul x21, x0, x22
-; CHECK-NEXT: and x11, x1, #0x8000000000000000
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x7, x19
-; CHECK-NEXT: and x7, x1, #0x2000000000000000
-; CHECK-NEXT: eor x9, x9, x5
-; CHECK-NEXT: mul x4, x0, x7
-; CHECK-NEXT: eor x10, x10, x23
-; CHECK-NEXT: eor x9, x9, x6
-; CHECK-NEXT: eor x10, x10, x24
-; CHECK-NEXT: eor x9, x9, x20
-; CHECK-NEXT: mul x11, x0, x11
-; CHECK-NEXT: eor x9, x9, x17
-; CHECK-NEXT: eor x10, x10, x21
-; CHECK-NEXT: eor x9, x9, x16
-; CHECK-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
-; CHECK-NEXT: eor x9, x9, x15
-; CHECK-NEXT: eor x10, x10, x4
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x13, x12
-; CHECK-NEXT: eor x10, x10, x18
-; CHECK-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
-; CHECK-NEXT: eor x9, x10, x11
-; CHECK-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
-; CHECK-NEXT: eor x0, x8, x9
-; CHECK-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #304
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i64:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: sub sp, sp, #304
+; CHECK-NEON-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: .cfi_def_cfa_offset 304
+; CHECK-NEON-NEXT: .cfi_offset w19, -8
+; CHECK-NEON-NEXT: .cfi_offset w20, -16
+; CHECK-NEON-NEXT: .cfi_offset w21, -24
+; CHECK-NEON-NEXT: .cfi_offset w22, -32
+; CHECK-NEON-NEXT: .cfi_offset w23, -40
+; CHECK-NEON-NEXT: .cfi_offset w24, -48
+; CHECK-NEON-NEXT: .cfi_offset w25, -56
+; CHECK-NEON-NEXT: .cfi_offset w26, -64
+; CHECK-NEON-NEXT: .cfi_offset w27, -72
+; CHECK-NEON-NEXT: .cfi_offset w28, -80
+; CHECK-NEON-NEXT: .cfi_offset w30, -88
+; CHECK-NEON-NEXT: .cfi_offset w29, -96
+; CHECK-NEON-NEXT: and x8, x1, #0x2
+; CHECK-NEON-NEXT: mul x9, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1
+; CHECK-NEON-NEXT: mul x10, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4
+; CHECK-NEON-NEXT: mul x11, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8
+; CHECK-NEON-NEXT: mul x13, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x10
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: mul x12, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x20
+; CHECK-NEON-NEXT: mul x14, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x40
+; CHECK-NEON-NEXT: eor x10, x11, x13
+; CHECK-NEON-NEXT: and x11, x1, #0x10000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x80
+; CHECK-NEON-NEXT: mul x15, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x100
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x200
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x400
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x800
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x1000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x2000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x4000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x8000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x10000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x20000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x40000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x80000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x100000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x200000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x400000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x800000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: ldr x30, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT: str x8, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x10000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x20000000
+; CHECK-NEON-NEXT: mul x26, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x40000000
+; CHECK-NEON-NEXT: mul x22, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x80000000
+; CHECK-NEON-NEXT: mul x23, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x100000000
+; CHECK-NEON-NEXT: mul x24, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x200000000
+; CHECK-NEON-NEXT: eor x22, x26, x22
+; CHECK-NEON-NEXT: ldr x26, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x25, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x400000000
+; CHECK-NEON-NEXT: eor x22, x22, x23
+; CHECK-NEON-NEXT: and x23, x1, #0x400000000000000
+; CHECK-NEON-NEXT: mul x27, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x800000000
+; CHECK-NEON-NEXT: eor x22, x22, x24
+; CHECK-NEON-NEXT: ldr x24, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x28, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000000
+; CHECK-NEON-NEXT: eor x22, x22, x25
+; CHECK-NEON-NEXT: ldr x25, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x29, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000000
+; CHECK-NEON-NEXT: eor x22, x22, x27
+; CHECK-NEON-NEXT: mul x21, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000000
+; CHECK-NEON-NEXT: mul x7, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000000
+; CHECK-NEON-NEXT: mul x19, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x10000000000
+; CHECK-NEON-NEXT: mul x5, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x20000000000
+; CHECK-NEON-NEXT: eor x7, x21, x7
+; CHECK-NEON-NEXT: mul x6, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x40000000000
+; CHECK-NEON-NEXT: mul x20, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x80000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: mul x23, x0, x23
+; CHECK-NEON-NEXT: str x8, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x100000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x200000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #8] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x400000000000
+; CHECK-NEON-NEXT: mul x4, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x800000000000
+; CHECK-NEON-NEXT: mul x17, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000000000
+; CHECK-NEON-NEXT: mul x18, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000000000
+; CHECK-NEON-NEXT: mul x3, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000000000
+; CHECK-NEON-NEXT: eor x17, x4, x17
+; CHECK-NEON-NEXT: mul x2, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000000000
+; CHECK-NEON-NEXT: eor x17, x17, x18
+; CHECK-NEON-NEXT: and x18, x1, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x16, x0, x8
+; CHECK-NEON-NEXT: eor x8, x9, x10
+; CHECK-NEON-NEXT: ldr x9, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x12, x14
+; CHECK-NEON-NEXT: ldr x12, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x17, x17, x3
+; CHECK-NEON-NEXT: eor x9, x15, x9
+; CHECK-NEON-NEXT: mul x15, x0, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x17, x17, x2
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x18, x0, x18
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x16, x17, x16
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: and x11, x1, #0x20000000000000
+; CHECK-NEON-NEXT: ldr x17, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: mul x14, x0, x11
+; CHECK-NEON-NEXT: and x10, x1, #0x40000000000000
+; CHECK-NEON-NEXT: eor x11, x8, x9
+; CHECK-NEON-NEXT: ldr x8, [sp, #192] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x9, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x13, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x15, x16, x15
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #120] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x16, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #176] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x14, x15, x14
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: and x10, x1, #0x80000000000000
+; CHECK-NEON-NEXT: ldr x15, [sp, #8] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x14, x13
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: mul x12, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #168] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x11, x8
+; CHECK-NEON-NEXT: ldr x11, [sp, #128] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: and x10, x1, #0x100000000000000
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x10, x0, x10
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldr x9, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: and x11, x1, #0x200000000000000
+; CHECK-NEON-NEXT: eor x9, x9, x30
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: eor x9, x9, x26
+; CHECK-NEON-NEXT: ldr x26, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x26
+; CHECK-NEON-NEXT: eor x9, x9, x24
+; CHECK-NEON-NEXT: and x24, x1, #0x800000000000000
+; CHECK-NEON-NEXT: eor x9, x9, x25
+; CHECK-NEON-NEXT: mul x24, x0, x24
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x22, x28
+; CHECK-NEON-NEXT: and x22, x1, #0x1000000000000000
+; CHECK-NEON-NEXT: eor x9, x9, x29
+; CHECK-NEON-NEXT: mul x21, x0, x22
+; CHECK-NEON-NEXT: and x11, x1, #0x8000000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x7, x19
+; CHECK-NEON-NEXT: and x7, x1, #0x2000000000000000
+; CHECK-NEON-NEXT: eor x9, x9, x5
+; CHECK-NEON-NEXT: mul x4, x0, x7
+; CHECK-NEON-NEXT: eor x10, x10, x23
+; CHECK-NEON-NEXT: eor x9, x9, x6
+; CHECK-NEON-NEXT: eor x10, x10, x24
+; CHECK-NEON-NEXT: eor x9, x9, x20
+; CHECK-NEON-NEXT: mul x11, x0, x11
+; CHECK-NEON-NEXT: eor x9, x9, x17
+; CHECK-NEON-NEXT: eor x10, x10, x21
+; CHECK-NEON-NEXT: eor x9, x9, x16
+; CHECK-NEON-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x9, x9, x15
+; CHECK-NEON-NEXT: eor x10, x10, x4
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x13, x12
+; CHECK-NEON-NEXT: eor x10, x10, x18
+; CHECK-NEON-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x9, x10, x11
+; CHECK-NEON-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x0, x8, x9
+; CHECK-NEON-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: add sp, sp, #304
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i64:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: fmov d0, x1
+; CHECK-AES-NEXT: fmov d1, x0
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov x0, d0
+; CHECK-AES-NEXT: ret
%a = call i64 @llvm.clmul.i64(i64 %x, i64 %y)
ret i64 %a
}
define i16 @clmul_i16_zext(i8 %x, i8 %y) {
-; CHECK-LABEL: clmul_i16_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: and w9, w1, #0x2
-; CHECK-NEXT: and w10, w1, #0x1
-; CHECK-NEXT: mul w9, w8, w9
-; CHECK-NEXT: and w11, w1, #0x4
-; CHECK-NEXT: and w12, w1, #0x8
-; CHECK-NEXT: mul w10, w8, w10
-; CHECK-NEXT: and w13, w1, #0x10
-; CHECK-NEXT: and w14, w1, #0x20
-; CHECK-NEXT: mul w11, w8, w11
-; CHECK-NEXT: and w15, w1, #0x40
-; CHECK-NEXT: mul w12, w8, w12
-; CHECK-NEXT: mul w13, w8, w13
-; CHECK-NEXT: eor w9, w10, w9
-; CHECK-NEXT: and w10, w1, #0x80
-; CHECK-NEXT: mul w14, w8, w14
-; CHECK-NEXT: mul w15, w8, w15
-; CHECK-NEXT: eor w11, w11, w12
-; CHECK-NEXT: mul w8, w8, w10
-; CHECK-NEXT: eor w9, w9, w11
-; CHECK-NEXT: eor w12, w13, w14
-; CHECK-NEXT: eor w10, w12, w15
-; CHECK-NEXT: eor w9, w9, w10
-; CHECK-NEXT: eor w0, w9, w8
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i16_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: and w8, w0, #0xff
+; CHECK-NEON-NEXT: and w9, w1, #0x2
+; CHECK-NEON-NEXT: and w10, w1, #0x1
+; CHECK-NEON-NEXT: mul w9, w8, w9
+; CHECK-NEON-NEXT: and w11, w1, #0x4
+; CHECK-NEON-NEXT: and w12, w1, #0x8
+; CHECK-NEON-NEXT: mul w10, w8, w10
+; CHECK-NEON-NEXT: and w13, w1, #0x10
+; CHECK-NEON-NEXT: and w14, w1, #0x20
+; CHECK-NEON-NEXT: mul w11, w8, w11
+; CHECK-NEON-NEXT: and w15, w1, #0x40
+; CHECK-NEON-NEXT: mul w12, w8, w12
+; CHECK-NEON-NEXT: mul w13, w8, w13
+; CHECK-NEON-NEXT: eor w9, w10, w9
+; CHECK-NEON-NEXT: and w10, w1, #0x80
+; CHECK-NEON-NEXT: mul w14, w8, w14
+; CHECK-NEON-NEXT: mul w15, w8, w15
+; CHECK-NEON-NEXT: eor w11, w11, w12
+; CHECK-NEON-NEXT: mul w8, w8, w10
+; CHECK-NEON-NEXT: eor w9, w9, w11
+; CHECK-NEON-NEXT: eor w12, w13, w14
+; CHECK-NEON-NEXT: eor w10, w12, w15
+; CHECK-NEON-NEXT: eor w9, w9, w10
+; CHECK-NEON-NEXT: eor w0, w9, w8
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i16_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: and w8, w0, #0xff
+; CHECK-AES-NEXT: and w9, w1, #0xff
+; CHECK-AES-NEXT: fmov s0, w9
+; CHECK-AES-NEXT: fmov s1, w8
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov w0, s0
+; CHECK-AES-NEXT: ret
%zextx = zext i8 %x to i16
%zexty = zext i8 %y to i16
%a = call i16 @llvm.clmul.i16(i16 %zextx, i16 %zexty)
@@ -501,57 +517,67 @@ define i16 @clmul_i16_zext(i8 %x, i8 %y) {
}
define i32 @clmul_i32_zext(i16 %x, i16 %y) {
-; CHECK-LABEL: clmul_i32_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: and w9, w1, #0x2
-; CHECK-NEXT: and w10, w1, #0x1
-; CHECK-NEXT: mul w9, w8, w9
-; CHECK-NEXT: and w11, w1, #0x4
-; CHECK-NEXT: and w12, w1, #0x8
-; CHECK-NEXT: mul w10, w8, w10
-; CHECK-NEXT: and w13, w1, #0x10
-; CHECK-NEXT: and w14, w1, #0x20
-; CHECK-NEXT: mul w11, w8, w11
-; CHECK-NEXT: and w16, w1, #0x80
-; CHECK-NEXT: and w17, w1, #0x100
-; CHECK-NEXT: mul w12, w8, w12
-; CHECK-NEXT: and w2, w1, #0x800
-; CHECK-NEXT: and w15, w1, #0x40
-; CHECK-NEXT: mul w13, w8, w13
-; CHECK-NEXT: eor w9, w10, w9
-; CHECK-NEXT: and w10, w1, #0x1000
-; CHECK-NEXT: mul w14, w8, w14
-; CHECK-NEXT: and w18, w1, #0x200
-; CHECK-NEXT: and w0, w1, #0x400
-; CHECK-NEXT: mul w16, w8, w16
-; CHECK-NEXT: eor w11, w11, w12
-; CHECK-NEXT: and w12, w1, #0x2000
-; CHECK-NEXT: mul w17, w8, w17
-; CHECK-NEXT: eor w9, w9, w11
-; CHECK-NEXT: and w11, w1, #0x4000
-; CHECK-NEXT: mul w2, w8, w2
-; CHECK-NEXT: eor w13, w13, w14
-; CHECK-NEXT: and w14, w1, #0x8000
-; CHECK-NEXT: mul w10, w8, w10
-; CHECK-NEXT: mul w15, w8, w15
-; CHECK-NEXT: eor w16, w16, w17
-; CHECK-NEXT: mul w18, w8, w18
-; CHECK-NEXT: mul w12, w8, w12
-; CHECK-NEXT: eor w10, w2, w10
-; CHECK-NEXT: mul w0, w8, w0
-; CHECK-NEXT: eor w13, w13, w15
-; CHECK-NEXT: mul w11, w8, w11
-; CHECK-NEXT: eor w9, w9, w13
-; CHECK-NEXT: mul w8, w8, w14
-; CHECK-NEXT: eor w14, w16, w18
-; CHECK-NEXT: eor w10, w10, w12
-; CHECK-NEXT: eor w12, w14, w0
-; CHECK-NEXT: eor w10, w10, w11
-; CHECK-NEXT: eor w9, w9, w12
-; CHECK-NEXT: eor w8, w10, w8
-; CHECK-NEXT: eor w0, w9, w8
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i32_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: and w8, w0, #0xffff
+; CHECK-NEON-NEXT: and w9, w1, #0x2
+; CHECK-NEON-NEXT: and w10, w1, #0x1
+; CHECK-NEON-NEXT: mul w9, w8, w9
+; CHECK-NEON-NEXT: and w11, w1, #0x4
+; CHECK-NEON-NEXT: and w12, w1, #0x8
+; CHECK-NEON-NEXT: mul w10, w8, w10
+; CHECK-NEON-NEXT: and w13, w1, #0x10
+; CHECK-NEON-NEXT: and w14, w1, #0x20
+; CHECK-NEON-NEXT: mul w11, w8, w11
+; CHECK-NEON-NEXT: and w16, w1, #0x80
+; CHECK-NEON-NEXT: and w17, w1, #0x100
+; CHECK-NEON-NEXT: mul w12, w8, w12
+; CHECK-NEON-NEXT: and w2, w1, #0x800
+; CHECK-NEON-NEXT: and w15, w1, #0x40
+; CHECK-NEON-NEXT: mul w13, w8, w13
+; CHECK-NEON-NEXT: eor w9, w10, w9
+; CHECK-NEON-NEXT: and w10, w1, #0x1000
+; CHECK-NEON-NEXT: mul w14, w8, w14
+; CHECK-NEON-NEXT: and w18, w1, #0x200
+; CHECK-NEON-NEXT: and w0, w1, #0x400
+; CHECK-NEON-NEXT: mul w16, w8, w16
+; CHECK-NEON-NEXT: eor w11, w11, w12
+; CHECK-NEON-NEXT: and w12, w1, #0x2000
+; CHECK-NEON-NEXT: mul w17, w8, w17
+; CHECK-NEON-NEXT: eor w9, w9, w11
+; CHECK-NEON-NEXT: and w11, w1, #0x4000
+; CHECK-NEON-NEXT: mul w2, w8, w2
+; CHECK-NEON-NEXT: eor w13, w13, w14
+; CHECK-NEON-NEXT: and w14, w1, #0x8000
+; CHECK-NEON-NEXT: mul w10, w8, w10
+; CHECK-NEON-NEXT: mul w15, w8, w15
+; CHECK-NEON-NEXT: eor w16, w16, w17
+; CHECK-NEON-NEXT: mul w18, w8, w18
+; CHECK-NEON-NEXT: mul w12, w8, w12
+; CHECK-NEON-NEXT: eor w10, w2, w10
+; CHECK-NEON-NEXT: mul w0, w8, w0
+; CHECK-NEON-NEXT: eor w13, w13, w15
+; CHECK-NEON-NEXT: mul w11, w8, w11
+; CHECK-NEON-NEXT: eor w9, w9, w13
+; CHECK-NEON-NEXT: mul w8, w8, w14
+; CHECK-NEON-NEXT: eor w14, w16, w18
+; CHECK-NEON-NEXT: eor w10, w10, w12
+; CHECK-NEON-NEXT: eor w12, w14, w0
+; CHECK-NEON-NEXT: eor w10, w10, w11
+; CHECK-NEON-NEXT: eor w9, w9, w12
+; CHECK-NEON-NEXT: eor w8, w10, w8
+; CHECK-NEON-NEXT: eor w0, w9, w8
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i32_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: and w8, w0, #0xffff
+; CHECK-AES-NEXT: and w9, w1, #0xffff
+; CHECK-AES-NEXT: fmov s0, w9
+; CHECK-AES-NEXT: fmov s1, w8
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov w0, s0
+; CHECK-AES-NEXT: ret
%zextx = zext i16 %x to i32
%zexty = zext i16 %y to i32
%a = call i32 @llvm.clmul.i32(i32 %zextx, i32 %zexty)
@@ -559,105 +585,115 @@ define i32 @clmul_i32_zext(i16 %x, i16 %y) {
}
define i64 @clmul_i64_zext(i32 %x, i32 %y) {
-; CHECK-LABEL: clmul_i64_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, w1
-; CHECK-NEXT: and x9, x8, #0x2
-; CHECK-NEXT: and x10, x8, #0x1
-; CHECK-NEXT: and x11, x8, #0x4
-; CHECK-NEXT: umull x9, w9, w0
-; CHECK-NEXT: and x12, x8, #0x8
-; CHECK-NEXT: and x13, x8, #0x10
-; CHECK-NEXT: umull x10, w10, w0
-; CHECK-NEXT: and x14, x8, #0x20
-; CHECK-NEXT: and x15, x8, #0x40
-; CHECK-NEXT: umull x11, w11, w0
-; CHECK-NEXT: and x2, x8, #0x800
-; CHECK-NEXT: and x16, x8, #0x80
-; CHECK-NEXT: umull x12, w12, w0
-; CHECK-NEXT: and x17, x8, #0x100
-; CHECK-NEXT: and x18, x8, #0x200
-; CHECK-NEXT: umull x13, w13, w0
-; CHECK-NEXT: eor x9, x10, x9
-; CHECK-NEXT: and x10, x8, #0x1000
-; CHECK-NEXT: umull x14, w14, w0
-; CHECK-NEXT: and x1, x8, #0x400
-; CHECK-NEXT: umull x15, w15, w0
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: and x12, x8, #0x2000
-; CHECK-NEXT: umull x2, w2, w0
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: and x11, x8, #0x4000
-; CHECK-NEXT: umull x10, w10, w0
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: and x14, x8, #0x8000
-; CHECK-NEXT: umull x16, w16, w0
-; CHECK-NEXT: eor x13, x13, x15
-; CHECK-NEXT: and x15, x8, #0x10000
-; CHECK-NEXT: umull x17, w17, w0
-; CHECK-NEXT: eor x9, x9, x13
-; CHECK-NEXT: and x13, x8, #0x20000
-; CHECK-NEXT: umull x12, w12, w0
-; CHECK-NEXT: eor x10, x2, x10
-; CHECK-NEXT: and x2, x8, #0x400000
-; CHECK-NEXT: umull x18, w18, w0
-; CHECK-NEXT: umull x11, w11, w0
-; CHECK-NEXT: eor x16, x16, x17
-; CHECK-NEXT: and x17, x8, #0x40000
-; CHECK-NEXT: umull x14, w14, w0
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: and x12, x8, #0x800000
-; CHECK-NEXT: umull x1, w1, w0
-; CHECK-NEXT: eor x16, x16, x18
-; CHECK-NEXT: and x18, x8, #0x80000
-; CHECK-NEXT: umull x15, w15, w0
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: and x11, x8, #0x1000000
-; CHECK-NEXT: umull x13, w13, w0
-; CHECK-NEXT: eor x10, x10, x14
-; CHECK-NEXT: and x14, x8, #0x2000000
-; CHECK-NEXT: umull x17, w17, w0
-; CHECK-NEXT: eor x16, x16, x1
-; CHECK-NEXT: and x1, x8, #0x100000
-; CHECK-NEXT: umull x2, w2, w0
-; CHECK-NEXT: eor x9, x9, x16
-; CHECK-NEXT: and x16, x8, #0x200000
-; CHECK-NEXT: umull x12, w12, w0
-; CHECK-NEXT: eor x13, x15, x13
-; CHECK-NEXT: and x15, x8, #0x4000000
-; CHECK-NEXT: umull x18, w18, w0
-; CHECK-NEXT: eor x13, x13, x17
-; CHECK-NEXT: and x17, x8, #0x8000000
-; CHECK-NEXT: umull x11, w11, w0
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: umull x14, w14, w0
-; CHECK-NEXT: eor x12, x2, x12
-; CHECK-NEXT: and x2, x8, #0x20000000
-; CHECK-NEXT: umull x1, w1, w0
-; CHECK-NEXT: eor x13, x13, x18
-; CHECK-NEXT: and x18, x8, #0x10000000
-; CHECK-NEXT: umull x15, w15, w0
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: and x12, x8, #0x40000000
-; CHECK-NEXT: umull x16, w16, w0
-; CHECK-NEXT: eor x11, x11, x14
-; CHECK-NEXT: and x8, x8, #0x80000000
-; CHECK-NEXT: umull x17, w17, w0
-; CHECK-NEXT: eor x13, x13, x1
-; CHECK-NEXT: umull x18, w18, w0
-; CHECK-NEXT: eor x11, x11, x15
-; CHECK-NEXT: umull x2, w2, w0
-; CHECK-NEXT: eor x10, x13, x16
-; CHECK-NEXT: umull x12, w12, w0
-; CHECK-NEXT: eor x11, x11, x17
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: umull x8, w8, w0
-; CHECK-NEXT: eor x10, x11, x18
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x11, x2, x12
-; CHECK-NEXT: eor x8, x11, x8
-; CHECK-NEXT: eor x0, x9, x8
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i64_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: mov w8, w1
+; CHECK-NEON-NEXT: and x9, x8, #0x2
+; CHECK-NEON-NEXT: and x10, x8, #0x1
+; CHECK-NEON-NEXT: and x11, x8, #0x4
+; CHECK-NEON-NEXT: umull x9, w9, w0
+; CHECK-NEON-NEXT: and x12, x8, #0x8
+; CHECK-NEON-NEXT: and x13, x8, #0x10
+; CHECK-NEON-NEXT: umull x10, w10, w0
+; CHECK-NEON-NEXT: and x14, x8, #0x20
+; CHECK-NEON-NEXT: and x15, x8, #0x40
+; CHECK-NEON-NEXT: umull x11, w11, w0
+; CHECK-NEON-NEXT: and x2, x8, #0x800
+; CHECK-NEON-NEXT: and x16, x8, #0x80
+; CHECK-NEON-NEXT: umull x12, w12, w0
+; CHECK-NEON-NEXT: and x17, x8, #0x100
+; CHECK-NEON-NEXT: and x18, x8, #0x200
+; CHECK-NEON-NEXT: umull x13, w13, w0
+; CHECK-NEON-NEXT: eor x9, x10, x9
+; CHECK-NEON-NEXT: and x10, x8, #0x1000
+; CHECK-NEON-NEXT: umull x14, w14, w0
+; CHECK-NEON-NEXT: and x1, x8, #0x400
+; CHECK-NEON-NEXT: umull x15, w15, w0
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: and x12, x8, #0x2000
+; CHECK-NEON-NEXT: umull x2, w2, w0
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: and x11, x8, #0x4000
+; CHECK-NEON-NEXT: umull x10, w10, w0
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x8000
+; CHECK-NEON-NEXT: umull x16, w16, w0
+; CHECK-NEON-NEXT: eor x13, x13, x15
+; CHECK-NEON-NEXT: and x15, x8, #0x10000
+; CHECK-NEON-NEXT: umull x17, w17, w0
+; CHECK-NEON-NEXT: eor x9, x9, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x20000
+; CHECK-NEON-NEXT: umull x12, w12, w0
+; CHECK-NEON-NEXT: eor x10, x2, x10
+; CHECK-NEON-NEXT: and x2, x8, #0x400000
+; CHECK-NEON-NEXT: umull x18, w18, w0
+; CHECK-NEON-NEXT: umull x11, w11, w0
+; CHECK-NEON-NEXT: eor x16, x16, x17
+; CHECK-NEON-NEXT: and x17, x8, #0x40000
+; CHECK-NEON-NEXT: umull x14, w14, w0
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: and x12, x8, #0x800000
+; CHECK-NEON-NEXT: umull x1, w1, w0
+; CHECK-NEON-NEXT: eor x16, x16, x18
+; CHECK-NEON-NEXT: and x18, x8, #0x80000
+; CHECK-NEON-NEXT: umull x15, w15, w0
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: and x11, x8, #0x1000000
+; CHECK-NEON-NEXT: umull x13, w13, w0
+; CHECK-NEON-NEXT: eor x10, x10, x14
+; CHECK-NEON-NEXT: and x14, x8, #0x2000000
+; CHECK-NEON-NEXT: umull x17, w17, w0
+; CHECK-NEON-NEXT: eor x16, x16, x1
+; CHECK-NEON-NEXT: and x1, x8, #0x100000
+; CHECK-NEON-NEXT: umull x2, w2, w0
+; CHECK-NEON-NEXT: eor x9, x9, x16
+; CHECK-NEON-NEXT: and x16, x8, #0x200000
+; CHECK-NEON-NEXT: umull x12, w12, w0
+; CHECK-NEON-NEXT: eor x13, x15, x13
+; CHECK-NEON-NEXT: and x15, x8, #0x4000000
+; CHECK-NEON-NEXT: umull x18, w18, w0
+; CHECK-NEON-NEXT: eor x13, x13, x17
+; CHECK-NEON-NEXT: and x17, x8, #0x8000000
+; CHECK-NEON-NEXT: umull x11, w11, w0
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: umull x14, w14, w0
+; CHECK-NEON-NEXT: eor x12, x2, x12
+; CHECK-NEON-NEXT: and x2, x8, #0x20000000
+; CHECK-NEON-NEXT: umull x1, w1, w0
+; CHECK-NEON-NEXT: eor x13, x13, x18
+; CHECK-NEON-NEXT: and x18, x8, #0x10000000
+; CHECK-NEON-NEXT: umull x15, w15, w0
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: and x12, x8, #0x40000000
+; CHECK-NEON-NEXT: umull x16, w16, w0
+; CHECK-NEON-NEXT: eor x11, x11, x14
+; CHECK-NEON-NEXT: and x8, x8, #0x80000000
+; CHECK-NEON-NEXT: umull x17, w17, w0
+; CHECK-NEON-NEXT: eor x13, x13, x1
+; CHECK-NEON-NEXT: umull x18, w18, w0
+; CHECK-NEON-NEXT: eor x11, x11, x15
+; CHECK-NEON-NEXT: umull x2, w2, w0
+; CHECK-NEON-NEXT: eor x10, x13, x16
+; CHECK-NEON-NEXT: umull x12, w12, w0
+; CHECK-NEON-NEXT: eor x11, x11, x17
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: umull x8, w8, w0
+; CHECK-NEON-NEXT: eor x10, x11, x18
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x11, x2, x12
+; CHECK-NEON-NEXT: eor x8, x11, x8
+; CHECK-NEON-NEXT: eor x0, x9, x8
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i64_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: mov w8, w0
+; CHECK-AES-NEXT: mov w9, w1
+; CHECK-AES-NEXT: fmov d0, x9
+; CHECK-AES-NEXT: fmov d1, x8
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov x0, d0
+; CHECK-AES-NEXT: ret
%zextx = zext i32 %x to i64
%zexty = zext i32 %y to i64
%a = call i64 @llvm.clmul.i64(i64 %zextx, i64 %zexty)
@@ -665,557 +701,573 @@ define i64 @clmul_i64_zext(i32 %x, i32 %y) {
}
define i128 @clmul_i128_zext(i64 %x, i64 %y) {
-; CHECK-LABEL: clmul_i128_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: sub sp, sp, #624
-; CHECK-NEXT: .cfi_def_cfa_offset 720
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
-; CHECK-NEXT: .cfi_offset w24, -48
-; CHECK-NEXT: .cfi_offset w25, -56
-; CHECK-NEXT: .cfi_offset w26, -64
-; CHECK-NEXT: .cfi_offset w27, -72
-; CHECK-NEXT: .cfi_offset w28, -80
-; CHECK-NEXT: .cfi_offset w30, -88
-; CHECK-NEXT: .cfi_offset w29, -96
-; CHECK-NEXT: and x8, x1, #0x2
-; CHECK-NEXT: mul x11, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1
-; CHECK-NEXT: mul x12, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4
-; CHECK-NEXT: mul x13, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8
-; CHECK-NEXT: mul x14, x0, x8
-; CHECK-NEXT: and x8, x1, #0x10
-; CHECK-NEXT: mul x15, x0, x8
-; CHECK-NEXT: and x8, x1, #0x20
-; CHECK-NEXT: mul x16, x0, x8
-; CHECK-NEXT: and x8, x1, #0x40
-; CHECK-NEXT: mul x17, x0, x8
-; CHECK-NEXT: and x8, x1, #0x80
-; CHECK-NEXT: mul x18, x0, x8
-; CHECK-NEXT: and x8, x1, #0x100
-; CHECK-NEXT: mul x3, x0, x8
-; CHECK-NEXT: and x8, x1, #0x200
-; CHECK-NEXT: mul x2, x0, x8
-; CHECK-NEXT: and x8, x1, #0x400
-; CHECK-NEXT: mul x4, x0, x8
-; CHECK-NEXT: and x8, x1, #0x800
-; CHECK-NEXT: mul x5, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1000
-; CHECK-NEXT: mul x20, x0, x8
-; CHECK-NEXT: and x8, x1, #0x2000
-; CHECK-NEXT: mul x6, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4000
-; CHECK-NEXT: mul x7, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8000
-; CHECK-NEXT: mul x19, x0, x8
-; CHECK-NEXT: and x8, x1, #0x10000
-; CHECK-NEXT: mul x21, x0, x8
-; CHECK-NEXT: and x8, x1, #0x20000
-; CHECK-NEXT: mul x22, x0, x8
-; CHECK-NEXT: and x8, x1, #0x40000
-; CHECK-NEXT: mul x23, x0, x8
-; CHECK-NEXT: and x8, x1, #0x80000
-; CHECK-NEXT: mul x24, x0, x8
-; CHECK-NEXT: and x8, x1, #0x100000
-; CHECK-NEXT: mul x25, x0, x8
-; CHECK-NEXT: and x8, x1, #0x200000
-; CHECK-NEXT: mul x26, x0, x8
-; CHECK-NEXT: and x8, x1, #0x400000
-; CHECK-NEXT: mul x27, x0, x8
-; CHECK-NEXT: and x8, x1, #0x800000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #592] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x1000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #584] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x2000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #616] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x4000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #576] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x8000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #608] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x10000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #600] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x20000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #568] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x40000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #512] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x80000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #536] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x100000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #528] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x200000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #560] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x400000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #520] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x800000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #552] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x1000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #544] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x2000000000
-; CHECK-NEXT: mul x9, x0, x8
-; CHECK-NEXT: and x8, x1, #0x4000000000
-; CHECK-NEXT: mul x10, x0, x8
-; CHECK-NEXT: and x8, x1, #0x8000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #464] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x10000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #456] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x20000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x8, x9, [sp, #488] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x1, #0x40000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x10, x8, [sp, #440] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x1, #0x80000000000
-; CHECK-NEXT: mul x9, x0, x8
-; CHECK-NEXT: and x8, x1, #0x100000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x8, x9, [sp, #472] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x1, #0x200000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #504] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x400000000000
-; CHECK-NEXT: mul x9, x0, x8
-; CHECK-NEXT: and x8, x1, #0x800000000000
-; CHECK-NEXT: mul x10, x0, x8
-; CHECK-NEXT: and x8, x1, #0x1000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #416] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x2000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: str x8, [sp, #408] // 8-byte Spill
-; CHECK-NEXT: and x8, x1, #0x4000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x8, x9, [sp, #424] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x1, #0x8000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x10, x8, [sp, #392] // 16-byte Folded Spill
-; CHECK-NEXT: and x8, x1, #0x100000000000000
-; CHECK-NEXT: mul x9, x0, x8
-; CHECK-NEXT: and x8, x1, #0x200000000000000
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: stp x8, x9, [sp, #376] // 16-byte Folded Spill
-; CHECK-NEXT: and x9, x1, #0x400000000000000
-; CHECK-NEXT: rbit x8, x1
-; CHECK-NEXT: mul x9, x0, x9
-; CHECK-NEXT: and x10, x8, #0x2
-; CHECK-NEXT: str x9, [sp, #368] // 8-byte Spill
-; CHECK-NEXT: rbit x9, x0
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #360] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #352] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #344] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #336] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #328] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #320] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #312] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #304] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #296] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #288] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #280] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #272] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #256] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #248] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #264] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #240] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #232] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #200] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #224] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #192] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #216] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #208] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #184] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #136] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #168] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #160] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #176] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #152] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #144] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #128] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #120] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #112] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x100000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #104] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x200000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #96] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x400000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #88] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x800000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #80] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x1000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #72] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x2000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #64] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x4000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #56] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x8000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #48] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x10000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #40] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x20000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #32] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x40000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: and x10, x8, #0x80000000000
-; CHECK-NEXT: mul x10, x9, x10
-; CHECK-NEXT: str x10, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: eor x10, x12, x11
-; CHECK-NEXT: and x12, x8, #0x100000000000
-; CHECK-NEXT: mul x12, x9, x12
-; CHECK-NEXT: eor x11, x13, x14
-; CHECK-NEXT: and x13, x8, #0x200000000000
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: eor x11, x15, x16
-; CHECK-NEXT: ldr x14, [sp, #608] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x17
-; CHECK-NEXT: mul x30, x9, x13
-; CHECK-NEXT: and x13, x8, #0x400000000000
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: mul x29, x9, x13
-; CHECK-NEXT: and x13, x8, #0x800000000000
-; CHECK-NEXT: str x12, [sp, #8] // 8-byte Spill
-; CHECK-NEXT: eor x12, x18, x3
-; CHECK-NEXT: eor x11, x12, x2
-; CHECK-NEXT: eor x12, x5, x20
-; CHECK-NEXT: mul x28, x9, x13
-; CHECK-NEXT: eor x11, x11, x4
-; CHECK-NEXT: ldr x13, [sp, #592] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: eor x11, x12, x6
-; CHECK-NEXT: eor x12, x21, x22
-; CHECK-NEXT: eor x11, x11, x7
-; CHECK-NEXT: eor x12, x12, x23
-; CHECK-NEXT: eor x13, x27, x13
-; CHECK-NEXT: eor x11, x11, x19
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: eor x11, x12, x24
-; CHECK-NEXT: and x12, x8, #0x1000000000000
-; CHECK-NEXT: eor x11, x11, x25
-; CHECK-NEXT: mul x27, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #584] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x26
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #616] // 8-byte Reload
-; CHECK-NEXT: eor x12, x13, x12
-; CHECK-NEXT: and x13, x8, #0x2000000000000
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: ldr x12, [sp, #576] // 8-byte Reload
-; CHECK-NEXT: mul x25, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #512] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #568] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x14
-; CHECK-NEXT: ldr x14, [sp, #536] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x4000000000000
-; CHECK-NEXT: mul x24, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #600] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: ldr x14, [sp, #552] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x13
-; CHECK-NEXT: ldr x13, [sp, #528] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #560] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x8000000000000
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: ldr x12, [sp, #520] // 8-byte Reload
-; CHECK-NEXT: mul x23, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #440] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #496] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x14
-; CHECK-NEXT: ldr x14, [sp, #464] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x10000000000000
-; CHECK-NEXT: mul x21, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #544] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: ldr x14, [sp, #256] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x13
-; CHECK-NEXT: ldr x13, [sp, #456] // 8-byte Reload
-; CHECK-NEXT: eor x26, x10, x11
-; CHECK-NEXT: ldr x10, [sp, #488] // 8-byte Reload
-; CHECK-NEXT: ldr x11, [sp, #448] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x20000000000000
-; CHECK-NEXT: eor x10, x12, x10
-; CHECK-NEXT: mul x20, x9, x13
-; CHECK-NEXT: ldr x12, [sp, #392] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #432] // 8-byte Reload
-; CHECK-NEXT: ldr x13, [sp, #480] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: and x12, x8, #0x40000000000000
-; CHECK-NEXT: eor x10, x10, x13
-; CHECK-NEXT: ldr x13, [sp, #416] // 8-byte Reload
-; CHECK-NEXT: mul x7, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #472] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x13
-; CHECK-NEXT: ldr x13, [sp, #504] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x12
-; CHECK-NEXT: ldr x12, [sp, #408] // 8-byte Reload
-; CHECK-NEXT: eor x22, x10, x13
-; CHECK-NEXT: ldr x10, [sp, #424] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: and x12, x8, #0x80000000000000
-; CHECK-NEXT: eor x10, x11, x10
-; CHECK-NEXT: ldr x11, [sp, #400] // 8-byte Reload
-; CHECK-NEXT: mul x5, x9, x12
-; CHECK-NEXT: ldr x12, [sp, #368] // 8-byte Reload
-; CHECK-NEXT: eor x19, x10, x11
-; CHECK-NEXT: ldp x11, x10, [sp, #376] // 16-byte Folded Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: and x11, x8, #0x100000000000000
-; CHECK-NEXT: eor x6, x10, x12
-; CHECK-NEXT: ldp x12, x10, [sp, #352] // 16-byte Folded Reload
-; CHECK-NEXT: mul x4, x9, x11
-; CHECK-NEXT: eor x10, x12, x10
-; CHECK-NEXT: ldp x12, x11, [sp, #336] // 16-byte Folded Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldp x13, x12, [sp, #320] // 16-byte Folded Reload
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: ldr x11, [sp, #312] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x13
-; CHECK-NEXT: and x13, x8, #0x200000000000000
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: mul x3, x9, x13
-; CHECK-NEXT: and x13, x8, #0x400000000000000
-; CHECK-NEXT: eor x12, x10, x11
-; CHECK-NEXT: ldp x11, x10, [sp, #296] // 16-byte Folded Reload
-; CHECK-NEXT: mul x2, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #280] // 8-byte Reload
-; CHECK-NEXT: eor x11, x10, x11
-; CHECK-NEXT: ldr x10, [sp, #288] // 8-byte Reload
-; CHECK-NEXT: eor x3, x4, x3
-; CHECK-NEXT: and x4, x1, #0x2000000000000000
-; CHECK-NEXT: eor x11, x11, x10
-; CHECK-NEXT: ldr x10, [sp, #272] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x13
-; CHECK-NEXT: ldr x13, [sp, #248] // 8-byte Reload
-; CHECK-NEXT: eor x2, x3, x2
-; CHECK-NEXT: eor x10, x10, x14
-; CHECK-NEXT: eor x11, x12, x11
-; CHECK-NEXT: ldr x12, [sp, #264] // 8-byte Reload
-; CHECK-NEXT: eor x10, x10, x13
-; CHECK-NEXT: and x13, x8, #0x800000000000000
-; CHECK-NEXT: mul x3, x0, x4
-; CHECK-NEXT: eor x12, x10, x12
-; CHECK-NEXT: ldr x10, [sp, #240] // 8-byte Reload
-; CHECK-NEXT: mul x18, x9, x13
-; CHECK-NEXT: ldr x13, [sp, #200] // 8-byte Reload
-; CHECK-NEXT: eor x12, x12, x10
-; CHECK-NEXT: ldr x10, [sp, #232] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldp x14, x12, [sp, #216] // 16-byte Folded Reload
-; CHECK-NEXT: eor x13, x10, x13
-; CHECK-NEXT: and x10, x8, #0x1000000000000000
-; CHECK-NEXT: mul x17, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #192] // 8-byte Reload
-; CHECK-NEXT: eor x12, x13, x12
-; CHECK-NEXT: ldr x13, [sp, #136] // 8-byte Reload
-; CHECK-NEXT: eor x18, x2, x18
-; CHECK-NEXT: eor x12, x12, x10
-; CHECK-NEXT: ldr x10, [sp, #184] // 8-byte Reload
-; CHECK-NEXT: and x2, x1, #0x4000000000000000
-; CHECK-NEXT: eor x12, x12, x14
-; CHECK-NEXT: ldr x14, [sp, #168] // 8-byte Reload
-; CHECK-NEXT: eor x13, x10, x13
-; CHECK-NEXT: and x10, x8, #0x2000000000000000
-; CHECK-NEXT: and x8, x8, #0x4000000000000000
-; CHECK-NEXT: mul x16, x9, x10
-; CHECK-NEXT: ldr x10, [sp, #208] // 8-byte Reload
-; CHECK-NEXT: eor x13, x13, x14
-; CHECK-NEXT: eor x17, x18, x17
-; CHECK-NEXT: eor x12, x12, x10
-; CHECK-NEXT: ldr x10, [sp, #160] // 8-byte Reload
-; CHECK-NEXT: mul x15, x9, x8
-; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: eor x13, x13, x10
-; CHECK-NEXT: ldr x10, [sp, #176] // 8-byte Reload
-; CHECK-NEXT: mul x18, x0, x2
-; CHECK-NEXT: eor x16, x17, x16
-; CHECK-NEXT: and x17, x1, #0x8000000000000000
-; CHECK-NEXT: eor x12, x13, x10
-; CHECK-NEXT: and x13, x1, #0x10000000000000
-; CHECK-NEXT: eor x9, x12, x8
-; CHECK-NEXT: ldp x10, x8, [sp, #120] // 16-byte Folded Reload
-; CHECK-NEXT: mul x14, x0, x13
-; CHECK-NEXT: eor x12, x8, x10
-; CHECK-NEXT: ldr x8, [sp, #144] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x8
-; CHECK-NEXT: ldp x8, x10, [sp, #104] // 16-byte Folded Reload
-; CHECK-NEXT: eor x9, x11, x9
-; CHECK-NEXT: eor x14, x19, x14
-; CHECK-NEXT: eor x12, x12, x10
-; CHECK-NEXT: eor x11, x12, x8
-; CHECK-NEXT: ldr x8, [sp, #96] // 8-byte Reload
-; CHECK-NEXT: and x12, x1, #0x20000000000000
-; CHECK-NEXT: mul x13, x0, x12
-; CHECK-NEXT: eor x11, x11, x8
-; CHECK-NEXT: ldp x10, x8, [sp, #56] // 16-byte Folded Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: ldp x12, x10, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: eor x11, x11, x10
-; CHECK-NEXT: ldr x10, [sp, #48] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x12
-; CHECK-NEXT: ldr x12, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: and x10, x1, #0x40000000000000
-; CHECK-NEXT: eor x8, x8, x12
-; CHECK-NEXT: mul x12, x0, x10
-; CHECK-NEXT: ldr x10, [sp, #72] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x10
-; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Reload
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: ldr x11, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: and x10, x1, #0x80000000000000
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: ldr x11, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: mul x10, x0, x10
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x11, x29, x28
-; CHECK-NEXT: ldr x29, [sp, #8] // 8-byte Reload
-; CHECK-NEXT: eor x11, x11, x27
-; CHECK-NEXT: and x28, x1, #0x800000000000000
-; CHECK-NEXT: eor x8, x8, x29
-; CHECK-NEXT: eor x11, x11, x25
-; CHECK-NEXT: mul x27, x0, x28
-; CHECK-NEXT: eor x8, x8, x30
-; CHECK-NEXT: and x25, x1, #0x1000000000000000
-; CHECK-NEXT: eor x8, x9, x8
-; CHECK-NEXT: eor x9, x11, x24
-; CHECK-NEXT: mul x11, x0, x25
-; CHECK-NEXT: eor x9, x9, x23
-; CHECK-NEXT: eor x9, x9, x21
-; CHECK-NEXT: eor x9, x9, x20
-; CHECK-NEXT: eor x9, x9, x7
-; CHECK-NEXT: eor x9, x9, x5
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x16, x15
-; CHECK-NEXT: mul x15, x0, x17
-; CHECK-NEXT: eor x16, x6, x27
-; CHECK-NEXT: eor x8, x8, x9
-; CHECK-NEXT: eor x9, x14, x13
-; CHECK-NEXT: eor x11, x16, x11
-; CHECK-NEXT: rbit x8, x8
-; CHECK-NEXT: eor x9, x9, x12
-; CHECK-NEXT: eor x11, x11, x3
-; CHECK-NEXT: eor x13, x26, x22
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: eor x10, x11, x18
-; CHECK-NEXT: lsr x1, x8, #1
-; CHECK-NEXT: eor x8, x13, x9
-; CHECK-NEXT: eor x9, x10, x15
-; CHECK-NEXT: eor x0, x8, x9
-; CHECK-NEXT: add sp, sp, #624
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: clmul_i128_zext:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: sub sp, sp, #624
+; CHECK-NEON-NEXT: .cfi_def_cfa_offset 720
+; CHECK-NEON-NEXT: .cfi_offset w19, -8
+; CHECK-NEON-NEXT: .cfi_offset w20, -16
+; CHECK-NEON-NEXT: .cfi_offset w21, -24
+; CHECK-NEON-NEXT: .cfi_offset w22, -32
+; CHECK-NEON-NEXT: .cfi_offset w23, -40
+; CHECK-NEON-NEXT: .cfi_offset w24, -48
+; CHECK-NEON-NEXT: .cfi_offset w25, -56
+; CHECK-NEON-NEXT: .cfi_offset w26, -64
+; CHECK-NEON-NEXT: .cfi_offset w27, -72
+; CHECK-NEON-NEXT: .cfi_offset w28, -80
+; CHECK-NEON-NEXT: .cfi_offset w30, -88
+; CHECK-NEON-NEXT: .cfi_offset w29, -96
+; CHECK-NEON-NEXT: and x8, x1, #0x2
+; CHECK-NEON-NEXT: mul x11, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1
+; CHECK-NEON-NEXT: mul x12, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4
+; CHECK-NEON-NEXT: mul x13, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8
+; CHECK-NEON-NEXT: mul x14, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x10
+; CHECK-NEON-NEXT: mul x15, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x20
+; CHECK-NEON-NEXT: mul x16, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x40
+; CHECK-NEON-NEXT: mul x17, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x80
+; CHECK-NEON-NEXT: mul x18, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x100
+; CHECK-NEON-NEXT: mul x3, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x200
+; CHECK-NEON-NEXT: mul x2, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x400
+; CHECK-NEON-NEXT: mul x4, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x800
+; CHECK-NEON-NEXT: mul x5, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1000
+; CHECK-NEON-NEXT: mul x20, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x2000
+; CHECK-NEON-NEXT: mul x6, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4000
+; CHECK-NEON-NEXT: mul x7, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8000
+; CHECK-NEON-NEXT: mul x19, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x10000
+; CHECK-NEON-NEXT: mul x21, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x20000
+; CHECK-NEON-NEXT: mul x22, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x40000
+; CHECK-NEON-NEXT: mul x23, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x80000
+; CHECK-NEON-NEXT: mul x24, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x100000
+; CHECK-NEON-NEXT: mul x25, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x200000
+; CHECK-NEON-NEXT: mul x26, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x400000
+; CHECK-NEON-NEXT: mul x27, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x800000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #592] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #584] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #616] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #576] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #608] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x10000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #600] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x20000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #568] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x40000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #512] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x80000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #536] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x100000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #528] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x200000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #560] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x400000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #520] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x800000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #552] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #544] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000000
+; CHECK-NEON-NEXT: mul x9, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000000
+; CHECK-NEON-NEXT: mul x10, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #464] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x10000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #456] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x20000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #488] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x40000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x10, x8, [sp, #440] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x80000000000
+; CHECK-NEON-NEXT: mul x9, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x100000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #472] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x200000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #504] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x400000000000
+; CHECK-NEON-NEXT: mul x9, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x800000000000
+; CHECK-NEON-NEXT: mul x10, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x1000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #416] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x2000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: str x8, [sp, #408] // 8-byte Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x4000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #424] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x8000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x10, x8, [sp, #392] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x8, x1, #0x100000000000000
+; CHECK-NEON-NEXT: mul x9, x0, x8
+; CHECK-NEON-NEXT: and x8, x1, #0x200000000000000
+; CHECK-NEON-NEXT: mul x8, x0, x8
+; CHECK-NEON-NEXT: stp x8, x9, [sp, #376] // 16-byte Folded Spill
+; CHECK-NEON-NEXT: and x9, x1, #0x400000000000000
+; CHECK-NEON-NEXT: rbit x8, x1
+; CHECK-NEON-NEXT: mul x9, x0, x9
+; CHECK-NEON-NEXT: and x10, x8, #0x2
+; CHECK-NEON-NEXT: str x9, [sp, #368] // 8-byte Spill
+; CHECK-NEON-NEXT: rbit x9, x0
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #360] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #352] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #344] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #336] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #328] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #320] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #304] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #280] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x100000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x200000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x400000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x800000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x4000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x8000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x10000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x20000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x40000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT: and x10, x8, #0x80000000000
+; CHECK-NEON-NEXT: mul x10, x9, x10
+; CHECK-NEON-NEXT: str x10, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x10, x12, x11
+; CHECK-NEON-NEXT: and x12, x8, #0x100000000000
+; CHECK-NEON-NEXT: mul x12, x9, x12
+; CHECK-NEON-NEXT: eor x11, x13, x14
+; CHECK-NEON-NEXT: and x13, x8, #0x200000000000
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: eor x11, x15, x16
+; CHECK-NEON-NEXT: ldr x14, [sp, #608] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x17
+; CHECK-NEON-NEXT: mul x30, x9, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x400000000000
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: mul x29, x9, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x800000000000
+; CHECK-NEON-NEXT: str x12, [sp, #8] // 8-byte Spill
+; CHECK-NEON-NEXT: eor x12, x18, x3
+; CHECK-NEON-NEXT: eor x11, x12, x2
+; CHECK-NEON-NEXT: eor x12, x5, x20
+; CHECK-NEON-NEXT: mul x28, x9, x13
+; CHECK-NEON-NEXT: eor x11, x11, x4
+; CHECK-NEON-NEXT: ldr x13, [sp, #592] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: eor x11, x12, x6
+; CHECK-NEON-NEXT: eor x12, x21, x22
+; CHECK-NEON-NEXT: eor x11, x11, x7
+; CHECK-NEON-NEXT: eor x12, x12, x23
+; CHECK-NEON-NEXT: eor x13, x27, x13
+; CHECK-NEON-NEXT: eor x11, x11, x19
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: eor x11, x12, x24
+; CHECK-NEON-NEXT: and x12, x8, #0x1000000000000
+; CHECK-NEON-NEXT: eor x11, x11, x25
+; CHECK-NEON-NEXT: mul x27, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #584] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x26
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #616] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x13, x12
+; CHECK-NEON-NEXT: and x13, x8, #0x2000000000000
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: ldr x12, [sp, #576] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x25, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #512] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #568] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #536] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x4000000000000
+; CHECK-NEON-NEXT: mul x24, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #600] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #552] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #528] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #560] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x8000000000000
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: ldr x12, [sp, #520] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x23, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #440] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #496] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #464] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x10000000000000
+; CHECK-NEON-NEXT: mul x21, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #544] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #456] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x26, x10, x11
+; CHECK-NEON-NEXT: ldr x10, [sp, #488] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x11, [sp, #448] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x20000000000000
+; CHECK-NEON-NEXT: eor x10, x12, x10
+; CHECK-NEON-NEXT: mul x20, x9, x13
+; CHECK-NEON-NEXT: ldr x12, [sp, #392] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #432] // 8-byte Reload
+; CHECK-NEON-NEXT: ldr x13, [sp, #480] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: and x12, x8, #0x40000000000000
+; CHECK-NEON-NEXT: eor x10, x10, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #416] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x7, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #472] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #504] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #408] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x22, x10, x13
+; CHECK-NEON-NEXT: ldr x10, [sp, #424] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: and x12, x8, #0x80000000000000
+; CHECK-NEON-NEXT: eor x10, x11, x10
+; CHECK-NEON-NEXT: ldr x11, [sp, #400] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x5, x9, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #368] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x19, x10, x11
+; CHECK-NEON-NEXT: ldp x11, x10, [sp, #376] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: and x11, x8, #0x100000000000000
+; CHECK-NEON-NEXT: eor x6, x10, x12
+; CHECK-NEON-NEXT: ldp x12, x10, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x4, x9, x11
+; CHECK-NEON-NEXT: eor x10, x12, x10
+; CHECK-NEON-NEXT: ldp x12, x11, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldp x13, x12, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x10, x10, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x200000000000000
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: mul x3, x9, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x400000000000000
+; CHECK-NEON-NEXT: eor x12, x10, x11
+; CHECK-NEON-NEXT: ldp x11, x10, [sp, #296] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x2, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x10, x11
+; CHECK-NEON-NEXT: ldr x10, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x3, x4, x3
+; CHECK-NEON-NEXT: and x4, x1, #0x2000000000000000
+; CHECK-NEON-NEXT: eor x11, x11, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x2, x3, x2
+; CHECK-NEON-NEXT: eor x10, x10, x14
+; CHECK-NEON-NEXT: eor x11, x12, x11
+; CHECK-NEON-NEXT: ldr x12, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x10, x10, x13
+; CHECK-NEON-NEXT: and x13, x8, #0x800000000000000
+; CHECK-NEON-NEXT: mul x3, x0, x4
+; CHECK-NEON-NEXT: eor x12, x10, x12
+; CHECK-NEON-NEXT: ldr x10, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x18, x9, x13
+; CHECK-NEON-NEXT: ldr x13, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x12, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldp x14, x12, [sp, #216] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x13, x10, x13
+; CHECK-NEON-NEXT: and x10, x8, #0x1000000000000000
+; CHECK-NEON-NEXT: mul x17, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #192] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x12, x13, x12
+; CHECK-NEON-NEXT: ldr x13, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x18, x2, x18
+; CHECK-NEON-NEXT: eor x12, x12, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT: and x2, x1, #0x4000000000000000
+; CHECK-NEON-NEXT: eor x12, x12, x14
+; CHECK-NEON-NEXT: ldr x14, [sp, #168] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x10, x13
+; CHECK-NEON-NEXT: and x10, x8, #0x2000000000000000
+; CHECK-NEON-NEXT: and x8, x8, #0x4000000000000000
+; CHECK-NEON-NEXT: mul x16, x9, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x13, x13, x14
+; CHECK-NEON-NEXT: eor x17, x18, x17
+; CHECK-NEON-NEXT: eor x12, x12, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x15, x9, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: eor x13, x13, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #176] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x18, x0, x2
+; CHECK-NEON-NEXT: eor x16, x17, x16
+; CHECK-NEON-NEXT: and x17, x1, #0x8000000000000000
+; CHECK-NEON-NEXT: eor x12, x13, x10
+; CHECK-NEON-NEXT: and x13, x1, #0x10000000000000
+; CHECK-NEON-NEXT: eor x9, x12, x8
+; CHECK-NEON-NEXT: ldp x10, x8, [sp, #120] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: mul x14, x0, x13
+; CHECK-NEON-NEXT: eor x12, x8, x10
+; CHECK-NEON-NEXT: ldr x8, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x8
+; CHECK-NEON-NEXT: ldp x8, x10, [sp, #104] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x9, x11, x9
+; CHECK-NEON-NEXT: eor x14, x19, x14
+; CHECK-NEON-NEXT: eor x12, x12, x10
+; CHECK-NEON-NEXT: eor x11, x12, x8
+; CHECK-NEON-NEXT: ldr x8, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT: and x12, x1, #0x20000000000000
+; CHECK-NEON-NEXT: mul x13, x0, x12
+; CHECK-NEON-NEXT: eor x11, x11, x8
+; CHECK-NEON-NEXT: ldp x10, x8, [sp, #56] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: ldp x12, x10, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: eor x11, x11, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x12
+; CHECK-NEON-NEXT: ldr x12, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: and x10, x1, #0x40000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x12
+; CHECK-NEON-NEXT: mul x12, x0, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x10
+; CHECK-NEON-NEXT: ldr x10, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x9, x9, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x8, x8, x10
+; CHECK-NEON-NEXT: and x10, x1, #0x80000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: ldr x11, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT: mul x10, x0, x10
+; CHECK-NEON-NEXT: eor x8, x8, x11
+; CHECK-NEON-NEXT: eor x11, x29, x28
+; CHECK-NEON-NEXT: ldr x29, [sp, #8] // 8-byte Reload
+; CHECK-NEON-NEXT: eor x11, x11, x27
+; CHECK-NEON-NEXT: and x28, x1, #0x800000000000000
+; CHECK-NEON-NEXT: eor x8, x8, x29
+; CHECK-NEON-NEXT: eor x11, x11, x25
+; CHECK-NEON-NEXT: mul x27, x0, x28
+; CHECK-NEON-NEXT: eor x8, x8, x30
+; CHECK-NEON-NEXT: and x25, x1, #0x1000000000000000
+; CHECK-NEON-NEXT: eor x8, x9, x8
+; CHECK-NEON-NEXT: eor x9, x11, x24
+; CHECK-NEON-NEXT: mul x11, x0, x25
+; CHECK-NEON-NEXT: eor x9, x9, x23
+; CHECK-NEON-NEXT: eor x9, x9, x21
+; CHECK-NEON-NEXT: eor x9, x9, x20
+; CHECK-NEON-NEXT: eor x9, x9, x7
+; CHECK-NEON-NEXT: eor x9, x9, x5
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x16, x15
+; CHECK-NEON-NEXT: mul x15, x0, x17
+; CHECK-NEON-NEXT: eor x16, x6, x27
+; CHECK-NEON-NEXT: eor x8, x8, x9
+; CHECK-NEON-NEXT: eor x9, x14, x13
+; CHECK-NEON-NEXT: eor x11, x16, x11
+; CHECK-NEON-NEXT: rbit x8, x8
+; CHECK-NEON-NEXT: eor x9, x9, x12
+; CHECK-NEON-NEXT: eor x11, x11, x3
+; CHECK-NEON-NEXT: eor x13, x26, x22
+; CHECK-NEON-NEXT: eor x9, x9, x10
+; CHECK-NEON-NEXT: eor x10, x11, x18
+; CHECK-NEON-NEXT: lsr x1, x8, #1
+; CHECK-NEON-NEXT: eor x8, x13, x9
+; CHECK-NEON-NEXT: eor x9, x10, x15
+; CHECK-NEON-NEXT: eor x0, x8, x9
+; CHECK-NEON-NEXT: add sp, sp, #624
+; CHECK-NEON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-AES-LABEL: clmul_i128_zext:
+; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: rbit x8, x1
+; CHECK-AES-NEXT: rbit x9, x0
+; CHECK-AES-NEXT: fmov d2, x0
+; CHECK-AES-NEXT: fmov d0, x8
+; CHECK-AES-NEXT: fmov d1, x9
+; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-AES-NEXT: fmov d1, x1
+; CHECK-AES-NEXT: pmull v1.1q, v2.1d, v1.1d
+; CHECK-AES-NEXT: fmov x8, d0
+; CHECK-AES-NEXT: rbit x8, x8
+; CHECK-AES-NEXT: fmov x0, d1
+; CHECK-AES-NEXT: lsr x1, x8, #1
+; CHECK-AES-NEXT: ret
%zextx = zext i64 %x to i128
%zexty = zext i64 %y to i128
%a = call i128 @llvm.clmul.i128(i128 %zextx, i128 %zexty)
More information about the llvm-commits
mailing list