[llvm] 3b65752 - [AArch64] Enabled and regenerate clmul-fixed.ll. NFC (#184628)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 00:32:47 PST 2026


Author: David Green
Date: 2026-03-05T08:32:42Z
New Revision: 3b657524b69f49165377ff507f0d024633bfa37b

URL: https://github.com/llvm/llvm-project/commit/3b657524b69f49165377ff507f0d024633bfa37b
DIFF: https://github.com/llvm/llvm-project/commit/3b657524b69f49165377ff507f0d024633bfa37b.diff

LOG: [AArch64] Enabled and regenerate clmul-fixed.ll. NFC (#184628)

The v2i64 tests are now fixed. The disabled ones in clmul-scalable.ll
require i128 vectors which are generally not supported.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/clmul-fixed.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/clmul-fixed.ll b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
index 6dbc0b4a70f37..37b7a26a8bbfb 100644
--- a/llvm/test/CodeGen/AArch64/clmul-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
@@ -432,16 +432,1229 @@ define <2 x i32> @clmul_v2i32_neon(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %a
 }
 
-; TODO: Fix
-; define <2 x i64> @clmul_v2i64_neon(<2 x i64> %x, <2 x i64> %y) {
-;   %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
-;   ret <2 x i64> %a
-; }
-; TODO: Fix
-; define <1 x i64> @clmul_v1i64_neon(<1 x i64> %x, <1 x i64> %y) {
-;   %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
-;   ret <1 x i64> %a
-; }
+define <2 x i64> @clmul_v2i64_neon(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-NEON-LABEL: clmul_v2i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    sub sp, sp, #416
+; CHECK-NEON-NEXT:    stp d13, d12, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp d11, d10, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp d9, d8, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x29, x30, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x28, x27, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x26, x25, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x24, x23, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x22, x21, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x20, x19, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    .cfi_def_cfa_offset 416
+; CHECK-NEON-NEXT:    .cfi_offset w19, -8
+; CHECK-NEON-NEXT:    .cfi_offset w20, -16
+; CHECK-NEON-NEXT:    .cfi_offset w21, -24
+; CHECK-NEON-NEXT:    .cfi_offset w22, -32
+; CHECK-NEON-NEXT:    .cfi_offset w23, -40
+; CHECK-NEON-NEXT:    .cfi_offset w24, -48
+; CHECK-NEON-NEXT:    .cfi_offset w25, -56
+; CHECK-NEON-NEXT:    .cfi_offset w26, -64
+; CHECK-NEON-NEXT:    .cfi_offset w27, -72
+; CHECK-NEON-NEXT:    .cfi_offset w28, -80
+; CHECK-NEON-NEXT:    .cfi_offset w30, -88
+; CHECK-NEON-NEXT:    .cfi_offset w29, -96
+; CHECK-NEON-NEXT:    .cfi_offset b8, -104
+; CHECK-NEON-NEXT:    .cfi_offset b9, -112
+; CHECK-NEON-NEXT:    .cfi_offset b10, -120
+; CHECK-NEON-NEXT:    .cfi_offset b11, -128
+; CHECK-NEON-NEXT:    .cfi_offset b12, -136
+; CHECK-NEON-NEXT:    .cfi_offset b13, -144
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    fmov x9, d0
+; CHECK-NEON-NEXT:    mov w10, #8 // =0x8
+; CHECK-NEON-NEXT:    dup v2.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    mov w14, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    dup v3.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEON-NEXT:    mov x3, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    dup v4.2d, x8
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov x8, d2
+; CHECK-NEON-NEXT:    mov x12, v2.d[1]
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    mov x13, v4.d[1]
+; CHECK-NEON-NEXT:    mul x16, x9, x8
+; CHECK-NEON-NEXT:    mov x8, v0.d[1]
+; CHECK-NEON-NEXT:    dup v0.2d, x10
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    mul x23, x9, x11
+; CHECK-NEON-NEXT:    mov x11, v3.d[1]
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    mul x5, x9, x10
+; CHECK-NEON-NEXT:    mov w10, #16 // =0x10
+; CHECK-NEON-NEXT:    dup v2.2d, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    fmov d5, x23
+; CHECK-NEON-NEXT:    mov x23, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x27, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #64 // =0x40
+; CHECK-NEON-NEXT:    fmov d16, x5
+; CHECK-NEON-NEXT:    mov x5, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov w11, #32 // =0x20
+; CHECK-NEON-NEXT:    dup v3.2d, x11
+; CHECK-NEON-NEXT:    mul x11, x8, x13
+; CHECK-NEON-NEXT:    mov w13, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    stp x10, x11, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #128 // =0x80
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    mul x29, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #256 // =0x100
+; CHECK-NEON-NEXT:    ldr d19, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #512 // =0x200
+; CHECK-NEON-NEXT:    ldr d17, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov v17.d[1], x29
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2048 // =0x800
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x22, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT:    str x10, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    fmov d22, x22
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8192 // =0x2000
+; CHECK-NEON-NEXT:    ldr d28, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16384 // =0x4000
+; CHECK-NEON-NEXT:    ldr d23, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #32768 // =0x8000
+; CHECK-NEON-NEXT:    ldr d7, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x28, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mul x18, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    mov w12, #65536 // =0x10000
+; CHECK-NEON-NEXT:    fmov d27, x28
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    fmov d30, x18
+; CHECK-NEON-NEXT:    str x10, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #131072 // =0x20000
+; CHECK-NEON-NEXT:    str x11, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    ldr d25, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #262144 // =0x40000
+; CHECK-NEON-NEXT:    str x11, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    ldr d21, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x6, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    dup v0.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #524288 // =0x80000
+; CHECK-NEON-NEXT:    mul x26, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    fmov d24, x26
+; CHECK-NEON-NEXT:    str x10, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    str x11, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    ldr d29, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    str x11, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    ldr d26, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x25, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    str x10, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    dup v0.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    mul x19, x9, x11
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    str x10, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    mul x0, x9, x11
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    mul x2, x9, x11
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    str x10, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    dup v0.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    mul x1, x9, x11
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    mul x30, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x15, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #8] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    mul x17, x9, x11
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x20, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v0.d[1]
+; CHECK-NEON-NEXT:    dup v0.2d, x12
+; CHECK-NEON-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    mul x4, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x13
+; CHECK-NEON-NEXT:    mov x11, v0.d[1]
+; CHECK-NEON-NEXT:    fmov x13, d0
+; CHECK-NEON-NEXT:    dup v0.2d, x14
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x24, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    fmov d0, x16
+; CHECK-NEON-NEXT:    mul x13, x9, x13
+; CHECK-NEON-NEXT:    mul x7, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    mov v0.d[1], x27
+; CHECK-NEON-NEXT:    dup v4.2d, x10
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x12, x9, x10
+; CHECK-NEON-NEXT:    movi v4.4s, #128, lsl #24
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, v2.d[1]
+; CHECK-NEON-NEXT:    mov x14, v3.d[1]
+; CHECK-NEON-NEXT:    fmov x16, d3
+; CHECK-NEON-NEXT:    dup v3.2d, x3
+; CHECK-NEON-NEXT:    fneg v4.2d, v4.2d
+; CHECK-NEON-NEXT:    mul x21, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT:    mul x10, x8, x14
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mov x14, v6.d[1]
+; CHECK-NEON-NEXT:    and v4.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x5
+; CHECK-NEON-NEXT:    mul x3, x9, x16
+; CHECK-NEON-NEXT:    mov x16, v2.d[1]
+; CHECK-NEON-NEXT:    fmov x5, d2
+; CHECK-NEON-NEXT:    dup v2.2d, x23
+; CHECK-NEON-NEXT:    mul x27, x8, x14
+; CHECK-NEON-NEXT:    fmov x14, d6
+; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    str x10, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x23, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    ldp d3, d18, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    mov v5.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x22, v6.d[1]
+; CHECK-NEON-NEXT:    mul x11, x9, x11
+; CHECK-NEON-NEXT:    mov v16.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT:    fmov x18, d2
+; CHECK-NEON-NEXT:    mul x14, x9, x14
+; CHECK-NEON-NEXT:    mov v19.d[1], x10
+; CHECK-NEON-NEXT:    eor v13.16b, v5.16b, v0.16b
+; CHECK-NEON-NEXT:    fmov d0, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x16
+; CHECK-NEON-NEXT:    mov x16, v4.d[1]
+; CHECK-NEON-NEXT:    mov x12, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    fmov d5, x17
+; CHECK-NEON-NEXT:    mov v0.d[1], x7
+; CHECK-NEON-NEXT:    mul x5, x9, x5
+; CHECK-NEON-NEXT:    eor v16.16b, v16.16b, v19.16b
+; CHECK-NEON-NEXT:    dup v19.2d, x12
+; CHECK-NEON-NEXT:    mov v5.d[1], x24
+; CHECK-NEON-NEXT:    mul x18, x9, x18
+; CHECK-NEON-NEXT:    str x10, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v19.16b
+; CHECK-NEON-NEXT:    mov v18.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v3.d[1], x10
+; CHECK-NEON-NEXT:    mul x10, x8, x16
+; CHECK-NEON-NEXT:    fmov x16, d4
+; CHECK-NEON-NEXT:    dup v4.2d, x23
+; CHECK-NEON-NEXT:    fmov x23, d6
+; CHECK-NEON-NEXT:    mul x16, x9, x16
+; CHECK-NEON-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    str x10, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x28, x9, x23
+; CHECK-NEON-NEXT:    mov v22.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #192] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v28.d[1], x10
+; CHECK-NEON-NEXT:    mul x10, x8, x22
+; CHECK-NEON-NEXT:    mov x22, v2.d[1]
+; CHECK-NEON-NEXT:    str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    dup v6.2d, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #200] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v23.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #168] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v20.16b, v1.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x6
+; CHECK-NEON-NEXT:    fmov x6, d4
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v27.d[1], x10
+; CHECK-NEON-NEXT:    mul x10, x8, x22
+; CHECK-NEON-NEXT:    mov x22, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    dup v2.2d, x22
+; CHECK-NEON-NEXT:    ldr x22, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x26, x9, x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #128] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v30.d[1], x22
+; CHECK-NEON-NEXT:    ldr x22, [sp, #120] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v21.d[1], x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    str x10, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v4.d[1]
+; CHECK-NEON-NEXT:    mov v25.d[1], x22
+; CHECK-NEON-NEXT:    mov v6.d[1], x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x22, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    dup v4.2d, x22
+; CHECK-NEON-NEXT:    mov x22, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    mov v24.d[1], x6
+; CHECK-NEON-NEXT:    fmov x6, d20
+; CHECK-NEON-NEXT:    dup v8.2d, x22
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v31.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d4, x19
+; CHECK-NEON-NEXT:    mov x19, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    mul x22, x9, x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v10.16b, v1.16b, v8.16b
+; CHECK-NEON-NEXT:    dup v9.2d, x19
+; CHECK-NEON-NEXT:    fmov d8, x2
+; CHECK-NEON-NEXT:    mov v29.d[1], x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT:    fmov x2, d31
+; CHECK-NEON-NEXT:    str x10, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v20.d[1]
+; CHECK-NEON-NEXT:    fmov d20, x25
+; CHECK-NEON-NEXT:    mov v26.d[1], x6
+; CHECK-NEON-NEXT:    fmov x6, d2
+; CHECK-NEON-NEXT:    and v11.16b, v1.16b, v9.16b
+; CHECK-NEON-NEXT:    fmov d9, x1
+; CHECK-NEON-NEXT:    mul x1, x9, x2
+; CHECK-NEON-NEXT:    ldr x2, [sp, #8] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov v8.d[1], x2
+; CHECK-NEON-NEXT:    mov x2, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    fmov x12, d11
+; CHECK-NEON-NEXT:    mov v9.d[1], x20
+; CHECK-NEON-NEXT:    mul x12, x9, x12
+; CHECK-NEON-NEXT:    str x10, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    fmov d2, x0
+; CHECK-NEON-NEXT:    mul x0, x9, x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v20.d[1], x6
+; CHECK-NEON-NEXT:    ldr x6, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v2.d[1], x30
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov v4.d[1], x6
+; CHECK-NEON-NEXT:    mov x6, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    dup v12.2d, x6
+; CHECK-NEON-NEXT:    str x10, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v31.d[1]
+; CHECK-NEON-NEXT:    fmov d31, x15
+; CHECK-NEON-NEXT:    mov x15, v11.d[1]
+; CHECK-NEON-NEXT:    mov v31.d[1], x4
+; CHECK-NEON-NEXT:    mov x4, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    mul x29, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v10.d[1]
+; CHECK-NEON-NEXT:    mul x6, x8, x15
+; CHECK-NEON-NEXT:    mul x25, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d10
+; CHECK-NEON-NEXT:    and v10.16b, v1.16b, v12.16b
+; CHECK-NEON-NEXT:    dup v12.2d, x2
+; CHECK-NEON-NEXT:    mul x2, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v10.d[1]
+; CHECK-NEON-NEXT:    and v11.16b, v1.16b, v12.16b
+; CHECK-NEON-NEXT:    eor v12.16b, v13.16b, v16.16b
+; CHECK-NEON-NEXT:    eor v16.16b, v17.16b, v18.16b
+; CHECK-NEON-NEXT:    eor v17.16b, v22.16b, v28.16b
+; CHECK-NEON-NEXT:    dup v18.2d, x4
+; CHECK-NEON-NEXT:    mov x4, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    eor v22.16b, v16.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v23.16b
+; CHECK-NEON-NEXT:    eor v23.16b, v27.16b, v30.16b
+; CHECK-NEON-NEXT:    dup v27.2d, x11
+; CHECK-NEON-NEXT:    fmov x11, d11
+; CHECK-NEON-NEXT:    mul x17, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d10
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    and v18.16b, v1.16b, v18.16b
+; CHECK-NEON-NEXT:    eor v22.16b, v12.16b, v22.16b
+; CHECK-NEON-NEXT:    eor v7.16b, v17.16b, v7.16b
+; CHECK-NEON-NEXT:    eor v17.16b, v23.16b, v25.16b
+; CHECK-NEON-NEXT:    dup v23.2d, x4
+; CHECK-NEON-NEXT:    mul x20, x9, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v3.d[1], x21
+; CHECK-NEON-NEXT:    mov x21, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    mov x15, v11.d[1]
+; CHECK-NEON-NEXT:    mul x7, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v19.d[1]
+; CHECK-NEON-NEXT:    mov v16.d[1], x11
+; CHECK-NEON-NEXT:    mov x11, v18.d[1]
+; CHECK-NEON-NEXT:    eor v22.16b, v22.16b, v7.16b
+; CHECK-NEON-NEXT:    fmov d7, x3
+; CHECK-NEON-NEXT:    mov x3, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v21.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v24.16b, v29.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x3
+; CHECK-NEON-NEXT:    fmov d24, x14
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    ldp d11, d10, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d19
+; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v27.16b
+; CHECK-NEON-NEXT:    eor v6.16b, v17.16b, v6.16b
+; CHECK-NEON-NEXT:    eor v17.16b, v21.16b, v26.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v8.16b, v9.16b
+; CHECK-NEON-NEXT:    mul x23, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d18
+; CHECK-NEON-NEXT:    and v18.16b, v1.16b, v23.16b
+; CHECK-NEON-NEXT:    mov v24.d[1], x27
+; CHECK-NEON-NEXT:    mov x27, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    dup v23.2d, x21
+; CHECK-NEON-NEXT:    mul x4, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v19.d[1]
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    eor v20.16b, v21.16b, v31.16b
+; CHECK-NEON-NEXT:    fmov d21, x5
+; CHECK-NEON-NEXT:    eor v6.16b, v22.16b, v6.16b
+; CHECK-NEON-NEXT:    mul x24, x9, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT:    fmov d22, x16
+; CHECK-NEON-NEXT:    mov x5, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    eor v4.16b, v17.16b, v4.16b
+; CHECK-NEON-NEXT:    dup v17.2d, x27
+; CHECK-NEON-NEXT:    mov v7.d[1], x11
+; CHECK-NEON-NEXT:    mov x11, v18.d[1]
+; CHECK-NEON-NEXT:    mul x19, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d19
+; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v25.16b
+; CHECK-NEON-NEXT:    eor v5.16b, v20.16b, v5.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x5
+; CHECK-NEON-NEXT:    eor v2.16b, v4.16b, v2.16b
+; CHECK-NEON-NEXT:    mov x27, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    dup v20.2d, x27
+; CHECK-NEON-NEXT:    and v17.16b, v1.16b, v17.16b
+; CHECK-NEON-NEXT:    mul x3, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d18
+; CHECK-NEON-NEXT:    eor v7.16b, v7.16b, v24.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v5.16b, v0.16b
+; CHECK-NEON-NEXT:    fmov d5, x28
+; CHECK-NEON-NEXT:    and v18.16b, v1.16b, v23.16b
+; CHECK-NEON-NEXT:    mul x30, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v19.d[1]
+; CHECK-NEON-NEXT:    eor v2.16b, v6.16b, v2.16b
+; CHECK-NEON-NEXT:    fmov d6, x26
+; CHECK-NEON-NEXT:    ldp d9, d8, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x16, x9, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov x27, d18
+; CHECK-NEON-NEXT:    and v16.16b, v1.16b, v20.16b
+; CHECK-NEON-NEXT:    mov x21, v18.d[1]
+; CHECK-NEON-NEXT:    mov v21.d[1], x11
+; CHECK-NEON-NEXT:    mul x5, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d19
+; CHECK-NEON-NEXT:    ldr x11, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v25.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x28, x9, x27
+; CHECK-NEON-NEXT:    ldp d13, d12, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mov v22.d[1], x11
+; CHECK-NEON-NEXT:    mov x11, v17.d[1]
+; CHECK-NEON-NEXT:    mul x14, x9, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v4.16b, v7.16b, v21.16b
+; CHECK-NEON-NEXT:    fmov d7, x18
+; CHECK-NEON-NEXT:    mov x18, v19.d[1]
+; CHECK-NEON-NEXT:    eor v0.16b, v2.16b, v0.16b
+; CHECK-NEON-NEXT:    mov v5.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #248] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x21, x8, x21
+; CHECK-NEON-NEXT:    eor v4.16b, v4.16b, v22.16b
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    fmov x10, d19
+; CHECK-NEON-NEXT:    mul x27, x8, x18
+; CHECK-NEON-NEXT:    mov x18, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    eor v4.16b, v4.16b, v5.16b
+; CHECK-NEON-NEXT:    fmov d5, x22
+; CHECK-NEON-NEXT:    mov x22, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    mul x26, x9, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT:    dup v3.2d, x18
+; CHECK-NEON-NEXT:    mov v6.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x18, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d17
+; CHECK-NEON-NEXT:    dup v17.2d, x22
+; CHECK-NEON-NEXT:    eor v2.16b, v4.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v5.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v16.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov d4, x0
+; CHECK-NEON-NEXT:    fmov d7, x1
+; CHECK-NEON-NEXT:    mul x22, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d16
+; CHECK-NEON-NEXT:    and v17.16b, v1.16b, v17.16b
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v6.16b
+; CHECK-NEON-NEXT:    mov x1, v3.d[1]
+; CHECK-NEON-NEXT:    fmov d16, x2
+; CHECK-NEON-NEXT:    mul x0, x8, x10
+; CHECK-NEON-NEXT:    mov x10, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    mov v7.d[1], x29
+; CHECK-NEON-NEXT:    dup v6.2d, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x2, x9, x11
+; CHECK-NEON-NEXT:    mov x11, v17.d[1]
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v5.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x10
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov x12, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    mov v16.d[1], x25
+; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v6.16b
+; CHECK-NEON-NEXT:    dup v5.2d, x12
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v7.16b
+; CHECK-NEON-NEXT:    mul x1, x8, x1
+; CHECK-NEON-NEXT:    mov v3.d[1], x6
+; CHECK-NEON-NEXT:    mul x12, x9, x10
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d4, x7
+; CHECK-NEON-NEXT:    fmov x10, d17
+; CHECK-NEON-NEXT:    and v5.16b, v1.16b, v5.16b
+; CHECK-NEON-NEXT:    mov x25, v6.d[1]
+; CHECK-NEON-NEXT:    mul x6, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov d17, x20
+; CHECK-NEON-NEXT:    dup v7.2d, x11
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x17
+; CHECK-NEON-NEXT:    mul x7, x9, x10
+; CHECK-NEON-NEXT:    fmov d16, x4
+; CHECK-NEON-NEXT:    mov x10, v5.d[1]
+; CHECK-NEON-NEXT:    mov x17, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov x11, d6
+; CHECK-NEON-NEXT:    mov x4, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v7.16b
+; CHECK-NEON-NEXT:    dup v7.2d, x17
+; CHECK-NEON-NEXT:    mov v17.d[1], x15
+; CHECK-NEON-NEXT:    mov v16.d[1], x13
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d3, x24
+; CHECK-NEON-NEXT:    and v5.16b, v1.16b, v7.16b
+; CHECK-NEON-NEXT:    dup v7.2d, x4
+; CHECK-NEON-NEXT:    fmov d4, x30
+; CHECK-NEON-NEXT:    mul x17, x9, x11
+; CHECK-NEON-NEXT:    mov x11, v6.d[1]
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v17.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d16, x16
+; CHECK-NEON-NEXT:    mov v3.d[1], x23
+; CHECK-NEON-NEXT:    mul x4, x9, x10
+; CHECK-NEON-NEXT:    mov x16, v5.d[1]
+; CHECK-NEON-NEXT:    fmov x10, d6
+; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v7.16b
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov v4.d[1], x19
+; CHECK-NEON-NEXT:    mov v16.d[1], x3
+; CHECK-NEON-NEXT:    mov x3, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    mov x19, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v17.2d, x19
+; CHECK-NEON-NEXT:    mov x20, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    mov v7.d[1], x5
+; CHECK-NEON-NEXT:    mov x5, v6.d[1]
+; CHECK-NEON-NEXT:    mul x14, x8, x16
+; CHECK-NEON-NEXT:    fmov x16, d5
+; CHECK-NEON-NEXT:    fmov d5, x28
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    dup v4.2d, x3
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v16.16b
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v17.16b
+; CHECK-NEON-NEXT:    movi v17.2d, #0000000000000000
+; CHECK-NEON-NEXT:    dup v16.2d, x20
+; CHECK-NEON-NEXT:    mul x15, x8, x25
+; CHECK-NEON-NEXT:    mov v5.d[1], x21
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    fmov d7, x26
+; CHECK-NEON-NEXT:    mul x3, x9, x16
+; CHECK-NEON-NEXT:    mov x19, v3.d[1]
+; CHECK-NEON-NEXT:    and v4.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov x20, d3
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v16.16b
+; CHECK-NEON-NEXT:    mul x16, x8, x5
+; CHECK-NEON-NEXT:    fmov x5, d6
+; CHECK-NEON-NEXT:    fmov d6, x22
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v5.16b
+; CHECK-NEON-NEXT:    mov v7.d[1], x27
+; CHECK-NEON-NEXT:    fmov v5.2d, #2.00000000
+; CHECK-NEON-NEXT:    fmov x21, d3
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v6.d[1], x18
+; CHECK-NEON-NEXT:    mov x18, v4.d[1]
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    ldp x24, x23, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    fneg v7.2d, v17.2d
+; CHECK-NEON-NEXT:    mul x5, x9, x5
+; CHECK-NEON-NEXT:    ldp x26, x25, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x2
+; CHECK-NEON-NEXT:    fmov x2, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x20, x9, x20
+; CHECK-NEON-NEXT:    ldp x28, x27, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mov v6.d[1], x0
+; CHECK-NEON-NEXT:    mov x0, v3.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v5.16b
+; CHECK-NEON-NEXT:    fmov d5, x7
+; CHECK-NEON-NEXT:    and v1.16b, v1.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x1
+; CHECK-NEON-NEXT:    mul x2, x9, x2
+; CHECK-NEON-NEXT:    ldp x29, x30, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mov x1, v3.d[1]
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x4
+; CHECK-NEON-NEXT:    mov v5.d[1], x6
+; CHECK-NEON-NEXT:    fmov x6, d3
+; CHECK-NEON-NEXT:    fmov d3, x17
+; CHECK-NEON-NEXT:    mul x12, x9, x21
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d4, x3
+; CHECK-NEON-NEXT:    mov v6.d[1], x13
+; CHECK-NEON-NEXT:    fmov x13, d1
+; CHECK-NEON-NEXT:    mul x19, x8, x19
+; CHECK-NEON-NEXT:    mov v3.d[1], x15
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v5.16b
+; CHECK-NEON-NEXT:    fmov d5, x20
+; CHECK-NEON-NEXT:    mul x17, x9, x6
+; CHECK-NEON-NEXT:    mov v4.d[1], x14
+; CHECK-NEON-NEXT:    ldp x22, x21, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x9, x9, x13
+; CHECK-NEON-NEXT:    mov x13, v1.d[1]
+; CHECK-NEON-NEXT:    fmov d1, x10
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov d3, x5
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    mul x18, x8, x18
+; CHECK-NEON-NEXT:    mov v5.d[1], x19
+; CHECK-NEON-NEXT:    mov v1.d[1], x11
+; CHECK-NEON-NEXT:    ldp x20, x19, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x10, x8, x0
+; CHECK-NEON-NEXT:    mov v3.d[1], x16
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x11, x8, x1
+; CHECK-NEON-NEXT:    eor v1.16b, v2.16b, v1.16b
+; CHECK-NEON-NEXT:    fmov d2, x2
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v5.16b
+; CHECK-NEON-NEXT:    mul x8, x8, x13
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov v4.d[1], x10
+; CHECK-NEON-NEXT:    mov v2.d[1], x18
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov d3, x17
+; CHECK-NEON-NEXT:    mov v3.d[1], x11
+; CHECK-NEON-NEXT:    mov v5.d[1], x8
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v3.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v5.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    add sp, sp, #416
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmul_v2i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    pmull2 v2.1q, v0.2d, v1.2d
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-AES-NEXT:    ret
+  %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %a
+}
+
+define <1 x i64> @clmul_v1i64_neon(<1 x i64> %x, <1 x i64> %y) {
+; CHECK-NEON-LABEL: clmul_v1i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEON-NEXT:    mov w9, #4 // =0x4
+; CHECK-NEON-NEXT:    mov w10, #8 // =0x8
+; CHECK-NEON-NEXT:    fmov d2, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    fmov x8, d0
+; CHECK-NEON-NEXT:    fmov d0, x9
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    mov w10, #16 // =0x10
+; CHECK-NEON-NEXT:    mul x14, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x15, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d0
+; CHECK-NEON-NEXT:    fmov d0, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d3, x14
+; CHECK-NEON-NEXT:    mul x12, x8, x9
+; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    mov w9, #32 // =0x20
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mov w9, #64 // =0x40
+; CHECK-NEON-NEXT:    mul x11, x8, x10
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    fmov x10, d0
+; CHECK-NEON-NEXT:    fmov d0, x9
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    mov w10, #128 // =0x80
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    mov w10, #256 // =0x100
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x14, d0
+; CHECK-NEON-NEXT:    fmov d0, x10
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    mul x10, x8, x14
+; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
+; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov x15, d2
+; CHECK-NEON-NEXT:    fmov d2, x14
+; CHECK-NEON-NEXT:    fmov x12, d0
+; CHECK-NEON-NEXT:    mul x14, x8, x15
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
+; CHECK-NEON-NEXT:    fmov d0, x15
+; CHECK-NEON-NEXT:    mov w15, #2048 // =0x800
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    mov w10, #16384 // =0x4000
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x15
+; CHECK-NEON-NEXT:    mov w15, #4096 // =0x1000
+; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x15, d0
+; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov w13, #8192 // =0x2000
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    eor v0.8b, v3.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v7.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT:    fmov x12, d6
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mov w12, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x9
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    fmov d16, x10
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mov w11, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    movi v16.2s, #128, lsl #24
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v7.8b, v3.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    mul x12, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov w11, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    mul x10, x8, x14
+; CHECK-NEON-NEXT:    mov w14, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    fmov x13, d6
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fneg d7, d16
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mov w12, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov w13, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    mov w14, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x15, d6
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    mul x14, x8, x15
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mov x14, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mov x11, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mov x9, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x12, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mov x12, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mov x11, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov x10, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mov x12, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x13
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mov x12, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x9, x8, x12
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mov x11, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov x9, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x11, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x10
+; CHECK-NEON-NEXT:    mov x10, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mov x11, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    movi d2, #0000000000000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d4
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fneg d2, d2
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    fmov d1, x13
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mul x8, x8, x10
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmul_v1i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-AES-NEXT:    ret
+  %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
+  ret <1 x i64> %a
+}
 
 define <1 x i128> @clmul_v1i128_neon(<1 x i128> %x, <1 x i128> %y) {
 ; CHECK-NEON-LABEL: clmul_v1i128_neon:
@@ -5126,25 +6339,1247 @@ define <2 x i32> @clmulr_v2i32_neon(<2 x i32> %a, <2 x i32> %b) nounwind {
   ret <2 x i32> %res
 }
 
-; TODO
-;define <2 x i64> @clmulr_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
-;  %a.ext = zext <2 x i64> %a to <2 x i128>
-;  %b.ext = zext <2 x i64> %b to <2 x i128>
-;  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
-;  %res.ext = lshr <2 x i128> %clmul, splat (i128 63)
-;  %res = trunc <2 x i128> %res.ext to <2 x i64>
-;  ret <2 x i64> %res
-;}
+define <2 x i64> @clmulr_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
+; CHECK-NEON-LABEL: clmulr_v2i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    sub sp, sp, #464
+; CHECK-NEON-NEXT:    rev64 v1.16b, v1.16b
+; CHECK-NEON-NEXT:    rev64 v2.16b, v0.16b
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    stp x24, x23, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    mov x2, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    stp x28, x27, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x26, x25, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    rbit v0.16b, v1.16b
+; CHECK-NEON-NEXT:    dup v1.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    dup v3.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEON-NEXT:    rbit v2.16b, v2.16b
+; CHECK-NEON-NEXT:    dup v4.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEON-NEXT:    stp x29, x30, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    dup v5.2d, x8
+; CHECK-NEON-NEXT:    stp x22, x21, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    stp x20, x19, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov x8, d2
+; CHECK-NEON-NEXT:    stp d11, d10, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v5.16b, v0.16b, v5.16b
+; CHECK-NEON-NEXT:    stp d9, d8, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    mov x11, v1.d[1]
+; CHECK-NEON-NEXT:    mov x13, v3.d[1]
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    stp d13, d12, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    fmov x12, d5
+; CHECK-NEON-NEXT:    str d14, [sp, #304] // 8-byte Spill
+; CHECK-NEON-NEXT:    mul x4, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    mul x6, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #16 // =0x10
+; CHECK-NEON-NEXT:    dup v1.2d, x10
+; CHECK-NEON-NEXT:    mov x10, v4.d[1]
+; CHECK-NEON-NEXT:    mul x3, x8, x9
+; CHECK-NEON-NEXT:    mov x9, v2.d[1]
+; CHECK-NEON-NEXT:    mul x24, x8, x12
+; CHECK-NEON-NEXT:    mov x12, v5.d[1]
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov d5, x3
+; CHECK-NEON-NEXT:    mov x3, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    mul x28, x9, x11
+; CHECK-NEON-NEXT:    mov w11, #32 // =0x20
+; CHECK-NEON-NEXT:    dup v2.2d, x11
+; CHECK-NEON-NEXT:    mul x11, x9, x13
+; CHECK-NEON-NEXT:    mov w13, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    str x10, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov w10, #64 // =0x40
+; CHECK-NEON-NEXT:    dup v3.2d, x10
+; CHECK-NEON-NEXT:    mul x10, x9, x12
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mov w12, #128 // =0x80
+; CHECK-NEON-NEXT:    str x11, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x11, v1.d[1]
+; CHECK-NEON-NEXT:    str x10, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    mul x27, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #256 // =0x100
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #280] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #512 // =0x200
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d25, d18, [sp, #248] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mov v18.d[1], x27
+; CHECK-NEON-NEXT:    mov x27, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2048 // =0x800
+; CHECK-NEON-NEXT:    ldr d19, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8192 // =0x2000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d26, d20, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16384 // =0x4000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #32768 // =0x8000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d17, d21, [sp, #168] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #65536 // =0x10000
+; CHECK-NEON-NEXT:    ldr d22, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x15, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #131072 // =0x20000
+; CHECK-NEON-NEXT:    str x10, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d23, x15
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #262144 // =0x40000
+; CHECK-NEON-NEXT:    mul x17, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #524288 // =0x80000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str x11, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    str x10, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    ldr d9, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    mul x25, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x30, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    fmov d10, x25
+; CHECK-NEON-NEXT:    str x10, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d30, x30
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    ldr d28, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    mul x26, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x29, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    str x10, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    mul x21, x8, x11
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d8, x21
+; CHECK-NEON-NEXT:    str x10, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    mul x7, x8, x11
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d11, x7
+; CHECK-NEON-NEXT:    str x10, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    mul x18, x8, x11
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    fmov d31, x18
+; CHECK-NEON-NEXT:    str x10, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    mul x1, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    dup v4.2d, x11
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    mul x14, x8, x11
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x10, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov x12, v1.d[1]
+; CHECK-NEON-NEXT:    str x10, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x23, x9, x12
+; CHECK-NEON-NEXT:    movi v4.4s, #128, lsl #24
+; CHECK-NEON-NEXT:    mov x12, v3.d[1]
+; CHECK-NEON-NEXT:    mul x19, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    fneg v4.2d, v4.2d
+; CHECK-NEON-NEXT:    mul x22, x9, x12
+; CHECK-NEON-NEXT:    mul x20, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    dup v2.2d, x13
+; CHECK-NEON-NEXT:    fmov x13, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x16, x8, x10
+; CHECK-NEON-NEXT:    mul x0, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    dup v3.2d, x2
+; CHECK-NEON-NEXT:    mov x12, v2.d[1]
+; CHECK-NEON-NEXT:    mov x2, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    dup v6.2d, x2
+; CHECK-NEON-NEXT:    mul x5, x8, x13
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x4
+; CHECK-NEON-NEXT:    fmov d3, x6
+; CHECK-NEON-NEXT:    mov x6, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    mul x10, x9, x12
+; CHECK-NEON-NEXT:    and v7.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x24
+; CHECK-NEON-NEXT:    mov x12, v1.d[1]
+; CHECK-NEON-NEXT:    mov x24, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    mul x4, x8, x13
+; CHECK-NEON-NEXT:    mov x13, v4.d[1]
+; CHECK-NEON-NEXT:    mov v2.d[1], x28
+; CHECK-NEON-NEXT:    str x10, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x28, x9, x12
+; CHECK-NEON-NEXT:    fmov x12, d1
+; CHECK-NEON-NEXT:    dup v1.2d, x3
+; CHECK-NEON-NEXT:    fmov x3, d4
+; CHECK-NEON-NEXT:    mov v5.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x11, x9, x13
+; CHECK-NEON-NEXT:    mov x13, v7.d[1]
+; CHECK-NEON-NEXT:    dup v4.2d, x6
+; CHECK-NEON-NEXT:    mov v3.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x2, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    mov v6.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    mov x6, v1.d[1]
+; CHECK-NEON-NEXT:    eor v2.16b, v5.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x3, x8, x3
+; CHECK-NEON-NEXT:    mov v25.d[1], x10
+; CHECK-NEON-NEXT:    mul x10, x9, x13
+; CHECK-NEON-NEXT:    fmov x13, d7
+; CHECK-NEON-NEXT:    dup v7.2d, x24
+; CHECK-NEON-NEXT:    mov x24, v4.d[1]
+; CHECK-NEON-NEXT:    fmov x15, d4
+; CHECK-NEON-NEXT:    fmov d4, x17
+; CHECK-NEON-NEXT:    eor v5.16b, v3.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d3, x16
+; CHECK-NEON-NEXT:    mov x16, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    ldr d7, [sp, #120] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v6.16b, v18.16b, v25.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x16
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    stp x11, x10, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x11, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    fmov x17, d16
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v3.d[1], x20
+; CHECK-NEON-NEXT:    mov v19.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x24, x9, x24
+; CHECK-NEON-NEXT:    mov v20.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x17, x8, x17
+; CHECK-NEON-NEXT:    mov v26.d[1], x10
+; CHECK-NEON-NEXT:    eor v6.16b, v6.16b, v19.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x6
+; CHECK-NEON-NEXT:    fmov x6, d1
+; CHECK-NEON-NEXT:    dup v1.2d, x27
+; CHECK-NEON-NEXT:    mov x27, v16.d[1]
+; CHECK-NEON-NEXT:    dup v16.2d, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v24.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #128] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v29.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    dup v16.2d, x12
+; CHECK-NEON-NEXT:    mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    str x10, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v19.16b, v20.16b, v26.16b
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    mul x6, x8, x6
+; CHECK-NEON-NEXT:    mov v21.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v12.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d16, x29
+; CHECK-NEON-NEXT:    mul x27, x9, x27
+; CHECK-NEON-NEXT:    mov v17.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    dup v1.2d, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v22.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v27.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    ldr d1, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v23.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v1.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d24
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v24.d[1]
+; CHECK-NEON-NEXT:    fmov d24, x26
+; CHECK-NEON-NEXT:    mul x30, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v22.16b, v22.16b, v23.16b
+; CHECK-NEON-NEXT:    mul x25, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v27.d[1]
+; CHECK-NEON-NEXT:    mov v9.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v7.16b, v22.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v10.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    eor v4.16b, v7.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    mov v30.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d27
+; CHECK-NEON-NEXT:    dup v27.2d, x12
+; CHECK-NEON-NEXT:    mov x12, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    eor v23.16b, v9.16b, v10.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v4.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x26, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v13.16b, v0.16b, v27.16b
+; CHECK-NEON-NEXT:    str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v29.d[1]
+; CHECK-NEON-NEXT:    dup v27.2d, x12
+; CHECK-NEON-NEXT:    mov v28.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x12, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    mov v24.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d29
+; CHECK-NEON-NEXT:    and v14.16b, v0.16b, v27.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov d27, x14
+; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    fmov d29, x1
+; CHECK-NEON-NEXT:    mul x7, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v27.d[1], x19
+; CHECK-NEON-NEXT:    mov v16.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v12.d[1]
+; CHECK-NEON-NEXT:    mov v8.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v11.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d12
+; CHECK-NEON-NEXT:    dup v12.2d, x12
+; CHECK-NEON-NEXT:    mul x18, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v13.d[1]
+; CHECK-NEON-NEXT:    and v12.16b, v0.16b, v12.16b
+; CHECK-NEON-NEXT:    mul x29, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x12, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d13
+; CHECK-NEON-NEXT:    dup v13.2d, x14
+; CHECK-NEON-NEXT:    mov v31.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x14, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v12.d[1]
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v13.16b
+; CHECK-NEON-NEXT:    eor v13.16b, v2.16b, v5.16b
+; CHECK-NEON-NEXT:    fmov d2, x0
+; CHECK-NEON-NEXT:    mov x0, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    dup v25.2d, x0
+; CHECK-NEON-NEXT:    fmov d5, x5
+; CHECK-NEON-NEXT:    mov x5, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    mov x16, v18.d[1]
+; CHECK-NEON-NEXT:    mov v29.d[1], x11
+; CHECK-NEON-NEXT:    mov x11, v14.d[1]
+; CHECK-NEON-NEXT:    mul x1, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d12
+; CHECK-NEON-NEXT:    eor v26.16b, v13.16b, v6.16b
+; CHECK-NEON-NEXT:    eor v6.16b, v19.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v19.2d, x5
+; CHECK-NEON-NEXT:    mov v5.d[1], x22
+; CHECK-NEON-NEXT:    mov v2.d[1], x23
+; CHECK-NEON-NEXT:    ldp d13, d12, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x0, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v20.d[1]
+; CHECK-NEON-NEXT:    eor v21.16b, v6.16b, v17.16b
+; CHECK-NEON-NEXT:    fmov d17, x4
+; CHECK-NEON-NEXT:    fmov d6, x2
+; CHECK-NEON-NEXT:    mul x16, x9, x16
+; CHECK-NEON-NEXT:    mov x2, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    mov x4, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    dup v22.2d, x4
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v6.d[1], x28
+; CHECK-NEON-NEXT:    mul x21, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d14
+; CHECK-NEON-NEXT:    ldr d14, [sp, #304] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x16, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x16, d18
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x2
+; CHECK-NEON-NEXT:    str x10, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x10, d20
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v19.16b
+; CHECK-NEON-NEXT:    mul x19, x8, x16
+; CHECK-NEON-NEXT:    mov x16, v18.d[1]
+; CHECK-NEON-NEXT:    eor v19.16b, v26.16b, v21.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v23.16b, v30.16b
+; CHECK-NEON-NEXT:    and v23.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    eor v25.16b, v8.16b, v11.16b
+; CHECK-NEON-NEXT:    mul x5, x8, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT:    dup v26.2d, x13
+; CHECK-NEON-NEXT:    eor v1.16b, v19.16b, v1.16b
+; CHECK-NEON-NEXT:    fmov d19, x6
+; CHECK-NEON-NEXT:    mov v17.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v20.d[1]
+; CHECK-NEON-NEXT:    mul x2, x9, x16
+; CHECK-NEON-NEXT:    fmov x16, d18
+; CHECK-NEON-NEXT:    fmov d18, x3
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v28.16b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    ldp d9, d8, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    ldp d11, d10, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x3, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d20
+; CHECK-NEON-NEXT:    eor v4.16b, v21.16b, v24.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v25.16b, v31.16b
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v22.16b
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v17.16b
+; CHECK-NEON-NEXT:    mul x20, x8, x16
+; CHECK-NEON-NEXT:    mov x16, v23.d[1]
+; CHECK-NEON-NEXT:    fmov d17, x30
+; CHECK-NEON-NEXT:    eor v4.16b, v4.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov x15, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v29.16b
+; CHECK-NEON-NEXT:    dup v24.2d, x15
+; CHECK-NEON-NEXT:    mov x4, v20.d[1]
+; CHECK-NEON-NEXT:    fmov x15, d20
+; CHECK-NEON-NEXT:    mov v18.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x23, x9, x16
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v27.16b
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v6.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x25
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    mov x25, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    dup v22.2d, x10
+; CHECK-NEON-NEXT:    fmov x10, d23
+; CHECK-NEON-NEXT:    and v23.16b, v0.16b, v26.16b
+; CHECK-NEON-NEXT:    eor v3.16b, v21.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x6, x9, x4
+; CHECK-NEON-NEXT:    ldr x4, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mov v16.d[1], x24
+; CHECK-NEON-NEXT:    fmov d4, x17
+; CHECK-NEON-NEXT:    mov x16, v23.d[1]
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v22.16b
+; CHECK-NEON-NEXT:    and v22.16b, v0.16b, v24.16b
+; CHECK-NEON-NEXT:    mov v19.d[1], x4
+; CHECK-NEON-NEXT:    fmov x4, d23
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    fmov d23, x26
+; CHECK-NEON-NEXT:    fmov d18, x7
+; CHECK-NEON-NEXT:    mov x24, v20.d[1]
+; CHECK-NEON-NEXT:    fmov d6, x29
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mul x22, x9, x16
+; CHECK-NEON-NEXT:    mov x16, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    mov x14, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    dup v21.2d, x16
+; CHECK-NEON-NEXT:    mov x16, v22.d[1]
+; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v7.16b
+; CHECK-NEON-NEXT:    mul x17, x8, x4
+; CHECK-NEON-NEXT:    fmov x4, d20
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    dup v2.2d, x14
+; CHECK-NEON-NEXT:    mov v6.d[1], x18
+; CHECK-NEON-NEXT:    mov v5.d[1], x12
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v21.2d, x25
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov v4.d[1], x27
+; CHECK-NEON-NEXT:    ldp x29, x30, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x7, x9, x16
+; CHECK-NEON-NEXT:    mov x16, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    dup v7.2d, x16
+; CHECK-NEON-NEXT:    ldr x16, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v21.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    mov x26, v20.d[1]
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v23.d[1], x16
+; CHECK-NEON-NEXT:    ldr x16, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x18, v21.d[1]
+; CHECK-NEON-NEXT:    mul x27, x8, x4
+; CHECK-NEON-NEXT:    fmov x4, d22
+; CHECK-NEON-NEXT:    mov v18.d[1], x16
+; CHECK-NEON-NEXT:    fmov x16, d20
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    eor v7.16b, v3.16b, v19.16b
+; CHECK-NEON-NEXT:    and v19.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    fmov d2, x0
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x24, x9, x24
+; CHECK-NEON-NEXT:    mov x12, v20.d[1]
+; CHECK-NEON-NEXT:    fmov x0, d20
+; CHECK-NEON-NEXT:    fmov d20, x13
+; CHECK-NEON-NEXT:    mul x14, x8, x16
+; CHECK-NEON-NEXT:    fmov x16, d21
+; CHECK-NEON-NEXT:    eor v7.16b, v7.16b, v16.16b
+; CHECK-NEON-NEXT:    eor v16.16b, v17.16b, v23.16b
+; CHECK-NEON-NEXT:    fmov d17, x20
+; CHECK-NEON-NEXT:    mov x20, v19.d[1]
+; CHECK-NEON-NEXT:    mul x11, x9, x18
+; CHECK-NEON-NEXT:    mov x18, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    mov v20.d[1], x3
+; CHECK-NEON-NEXT:    dup v21.2d, x18
+; CHECK-NEON-NEXT:    mov v3.d[1], x21
+; CHECK-NEON-NEXT:    mov v2.d[1], x1
+; CHECK-NEON-NEXT:    mul x18, x8, x16
+; CHECK-NEON-NEXT:    mov v17.d[1], x2
+; CHECK-NEON-NEXT:    eor v16.16b, v16.16b, v18.16b
+; CHECK-NEON-NEXT:    mov x2, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    mov x3, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    eor v4.16b, v7.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x16, x9, x12
+; CHECK-NEON-NEXT:    mov x12, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v21.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v18.2d, x12
+; CHECK-NEON-NEXT:    fmov x12, d19
+; CHECK-NEON-NEXT:    fmov d19, x10
+; CHECK-NEON-NEXT:    eor v6.16b, v16.16b, v6.16b
+; CHECK-NEON-NEXT:    dup v16.2d, x2
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    fmov d20, x15
+; CHECK-NEON-NEXT:    mul x13, x8, x0
+; CHECK-NEON-NEXT:    mov x10, v21.d[1]
+; CHECK-NEON-NEXT:    mov v19.d[1], x23
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v18.16b
+; CHECK-NEON-NEXT:    fmov x15, d21
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d21, x17
+; CHECK-NEON-NEXT:    mov v20.d[1], x6
+; CHECK-NEON-NEXT:    mov x17, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v5.16b
+; CHECK-NEON-NEXT:    mul x0, x9, x20
+; CHECK-NEON-NEXT:    mov x1, v18.d[1]
+; CHECK-NEON-NEXT:    dup v7.2d, x3
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v19.16b
+; CHECK-NEON-NEXT:    mov x2, v16.d[1]
+; CHECK-NEON-NEXT:    dup v19.2d, x17
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v21.d[1], x22
+; CHECK-NEON-NEXT:    fmov x17, d18
+; CHECK-NEON-NEXT:    fmov d18, x27
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v3.16b
+; CHECK-NEON-NEXT:    and v7.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    eor v6.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    fmov d17, x12
+; CHECK-NEON-NEXT:    mul x25, x8, x4
+; CHECK-NEON-NEXT:    and v19.16b, v0.16b, v19.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v18.d[1], x24
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x0
+; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v21.16b
+; CHECK-NEON-NEXT:    movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT:    mul x0, x9, x2
+; CHECK-NEON-NEXT:    fmov x2, d16
+; CHECK-NEON-NEXT:    fmov v16.2d, #2.00000000
+; CHECK-NEON-NEXT:    fmov d20, x15
+; CHECK-NEON-NEXT:    mov x12, v19.d[1]
+; CHECK-NEON-NEXT:    fmov d21, x25
+; CHECK-NEON-NEXT:    mul x17, x8, x17
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    fneg v6.2d, v6.2d
+; CHECK-NEON-NEXT:    ldp x22, x21, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x15, x8, x2
+; CHECK-NEON-NEXT:    fmov x2, d19
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    mov v20.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v7.d[1]
+; CHECK-NEON-NEXT:    mov v21.d[1], x7
+; CHECK-NEON-NEXT:    mul x1, x9, x1
+; CHECK-NEON-NEXT:    ldp x24, x23, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    fmov d19, x17
+; CHECK-NEON-NEXT:    and v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    mul x17, x8, x2
+; CHECK-NEON-NEXT:    ldr x14, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT:    fmov d18, x15
+; CHECK-NEON-NEXT:    mov x15, v16.d[1]
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    mul x4, x9, x26
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v21.16b
+; CHECK-NEON-NEXT:    mov v19.d[1], x1
+; CHECK-NEON-NEXT:    fmov x1, d7
+; CHECK-NEON-NEXT:    fmov d7, x19
+; CHECK-NEON-NEXT:    mul x12, x9, x12
+; CHECK-NEON-NEXT:    mov v18.d[1], x0
+; CHECK-NEON-NEXT:    ldp x20, x19, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x0, x8, x1
+; CHECK-NEON-NEXT:    mov v7.d[1], x14
+; CHECK-NEON-NEXT:    eor v5.16b, v17.16b, v19.16b
+; CHECK-NEON-NEXT:    fmov d17, x17
+; CHECK-NEON-NEXT:    fmov x17, d0
+; CHECK-NEON-NEXT:    mul x14, x9, x15
+; CHECK-NEON-NEXT:    fmov x15, d16
+; CHECK-NEON-NEXT:    mov v6.d[1], x4
+; CHECK-NEON-NEXT:    fmov d16, x5
+; CHECK-NEON-NEXT:    ldp x26, x25, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v17.d[1], x12
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    mov x12, v0.d[1]
+; CHECK-NEON-NEXT:    fmov d0, x18
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v7.16b
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    fmov d7, x0
+; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    ldp x28, x27, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x8, x8, x17
+; CHECK-NEON-NEXT:    ldr x17, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v0.d[1], x11
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v17.16b
+; CHECK-NEON-NEXT:    mov v16.d[1], x17
+; CHECK-NEON-NEXT:    mul x9, x9, x12
+; CHECK-NEON-NEXT:    mov v6.d[1], x16
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    eor v0.16b, v3.16b, v0.16b
+; CHECK-NEON-NEXT:    fmov d4, x8
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x14
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v16.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x9
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v17.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v2.16b, v4.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-NEON-NEXT:    rbit v0.16b, v0.16b
+; CHECK-NEON-NEXT:    add sp, sp, #464
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmulr_v2i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    rev64 v1.16b, v1.16b
+; CHECK-AES-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-AES-NEXT:    rbit v1.16b, v1.16b
+; CHECK-AES-NEXT:    rbit v0.16b, v0.16b
+; CHECK-AES-NEXT:    pmull2 v2.1q, v0.2d, v1.2d
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-AES-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-AES-NEXT:    rbit v0.16b, v0.16b
+; CHECK-AES-NEXT:    ret
+  %a.ext = zext <2 x i64> %a to <2 x i128>
+  %b.ext = zext <2 x i64> %b to <2 x i128>
+  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
+  %res.ext = lshr <2 x i128> %clmul, splat (i128 63)
+  %res = trunc <2 x i128> %res.ext to <2 x i64>
+  ret <2 x i64> %res
+}
 
-; TODO
-;define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
-;  %a.ext = zext <1 x i64> %a to <1 x i128>
-;  %b.ext = zext <1 x i64> %b to <1 x i128>
-;  %clmul = call <1 x i128> @llvm.clmul.v2i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
-;  %res.ext = lshr <1 x i128> %clmul, splat (i128 63)
-;  %res = trunc <1 x i128> %res.ext to <1 x i64>
-;  ret <1 x i64> %res
-;}
+define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
+; CHECK-NEON-LABEL: clmulr_v1i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    rev64 v1.8b, v1.8b
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    rev64 v2.8b, v0.8b
+; CHECK-NEON-NEXT:    mov w10, #8 // =0x8
+; CHECK-NEON-NEXT:    mov w11, #16 // =0x10
+; CHECK-NEON-NEXT:    mov w12, #32 // =0x20
+; CHECK-NEON-NEXT:    mov w13, #64 // =0x40
+; CHECK-NEON-NEXT:    mov w14, #128 // =0x80
+; CHECK-NEON-NEXT:    mov w15, #256 // =0x100
+; CHECK-NEON-NEXT:    rbit v0.8b, v1.8b
+; CHECK-NEON-NEXT:    fmov d1, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    rbit v2.8b, v2.8b
+; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEON-NEXT:    fmov d4, x8
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x8, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    fmov x13, d1
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov d3, x15
+; CHECK-NEON-NEXT:    fmov x14, d2
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    mov w11, #2048 // =0x800
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    fmov x12, d1
+; CHECK-NEON-NEXT:    fmov d1, x13
+; CHECK-NEON-NEXT:    mov w13, #4096 // =0x1000
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov w11, #8192 // =0x2000
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x9, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x9
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    mul x9, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    fmov x15, d5
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x11, d7
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mul x12, x8, x15
+; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x15
+; CHECK-NEON-NEXT:    mov w15, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    movi v4.2s, #128, lsl #24
+; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov w15, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    fmov x14, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fneg d4, d4
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    fmov x15, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x15, d16
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x15
+; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mov x15, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d17, x13
+; CHECK-NEON-NEXT:    fmov d16, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x13, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mov x9, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mov x11, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov x10, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d6
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x12, x8, x10
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x9
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x9, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d16, x9
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x14
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d7, x15
+; CHECK-NEON-NEXT:    fmov x15, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d6
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    mov x13, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x15, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d7
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    fmov d17, x13
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    mul x14, x8, x10
+; CHECK-NEON-NEXT:    eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    fmov x12, d16
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    fmov d18, x15
+; CHECK-NEON-NEXT:    mov x15, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    fmov x13, d17
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d17, x12
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov x14, d16
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    mov x14, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov x11, d17
+; CHECK-NEON-NEXT:    fmov d17, x14
+; CHECK-NEON-NEXT:    mov x14, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov x14, d16
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x12
+; CHECK-NEON-NEXT:    fmov x12, d17
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    mul x15, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov x12, d16
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    fmov x13, d17
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov x15, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov x14, d3
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    mov x15, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d7, x15
+; CHECK-NEON-NEXT:    mov x15, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    movi d16, #0000000000000000
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d18, x13
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fneg d16, d16
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
+; CHECK-NEON-NEXT:    fmov x15, d7
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v18.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    fmov x14, d17
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x11, d7
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    fmov d0, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x8, x8, x11
+; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-NEON-NEXT:    rbit v0.8b, v0.8b
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmulr_v1i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    rev64 v1.8b, v1.8b
+; CHECK-AES-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-AES-NEXT:    rbit v1.8b, v1.8b
+; CHECK-AES-NEXT:    rbit v0.8b, v0.8b
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-AES-NEXT:    rbit v0.8b, v0.8b
+; CHECK-AES-NEXT:    ret
+  %a.ext = zext <1 x i64> %a to <1 x i128>
+  %b.ext = zext <1 x i64> %b to <1 x i128>
+  %clmul = call <1 x i128> @llvm.clmul.v2i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
+  %res.ext = lshr <1 x i128> %clmul, splat (i128 63)
+  %res = trunc <1 x i128> %res.ext to <1 x i64>
+  ret <1 x i64> %res
+}
 
 define <16 x i8> @clmulh_v16i8_neon(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; CHECK-LABEL: clmulh_v16i8_neon:
@@ -5716,22 +8151,1248 @@ define <2 x i32> @clmulh_v2i32_neon(<2 x i32> %a, <2 x i32> %b) nounwind {
   ret <2 x i32> %res
 }
 
-; TODO
-;define <2 x i64> @clmulh_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
-;  %a.ext = zext <2 x i64> %a to <2 x i128>
-;  %b.ext = zext <2 x i64> %b to <2 x i128>
-;  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
-;  %res.ext = lshr <2 x i128> %clmul, splat (i128 64)
-;  %res = trunc <2 x i128> %res.ext to <2 x i64>
-;  ret <2 x i64> %res
-;}
+define <2 x i64> @clmulh_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
+; CHECK-NEON-LABEL: clmulh_v2i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    sub sp, sp, #464
+; CHECK-NEON-NEXT:    rev64 v1.16b, v1.16b
+; CHECK-NEON-NEXT:    rev64 v2.16b, v0.16b
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    stp x24, x23, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    mov x2, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    stp x28, x27, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    stp x26, x25, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    rbit v0.16b, v1.16b
+; CHECK-NEON-NEXT:    dup v1.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    dup v3.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEON-NEXT:    rbit v2.16b, v2.16b
+; CHECK-NEON-NEXT:    dup v4.2d, x8
+; CHECK-NEON-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEON-NEXT:    stp x29, x30, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    dup v5.2d, x8
+; CHECK-NEON-NEXT:    stp x22, x21, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    stp x20, x19, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov x8, d2
+; CHECK-NEON-NEXT:    stp d11, d10, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    and v5.16b, v0.16b, v5.16b
+; CHECK-NEON-NEXT:    stp d9, d8, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    mov x11, v1.d[1]
+; CHECK-NEON-NEXT:    mov x13, v3.d[1]
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    stp d13, d12, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    fmov x12, d5
+; CHECK-NEON-NEXT:    str d14, [sp, #304] // 8-byte Spill
+; CHECK-NEON-NEXT:    mul x4, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    mul x6, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #16 // =0x10
+; CHECK-NEON-NEXT:    dup v1.2d, x10
+; CHECK-NEON-NEXT:    mov x10, v4.d[1]
+; CHECK-NEON-NEXT:    mul x3, x8, x9
+; CHECK-NEON-NEXT:    mov x9, v2.d[1]
+; CHECK-NEON-NEXT:    mul x24, x8, x12
+; CHECK-NEON-NEXT:    mov x12, v5.d[1]
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov d5, x3
+; CHECK-NEON-NEXT:    mov x3, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    mul x28, x9, x11
+; CHECK-NEON-NEXT:    mov w11, #32 // =0x20
+; CHECK-NEON-NEXT:    dup v2.2d, x11
+; CHECK-NEON-NEXT:    mul x11, x9, x13
+; CHECK-NEON-NEXT:    mov w13, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    str x10, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov w10, #64 // =0x40
+; CHECK-NEON-NEXT:    dup v3.2d, x10
+; CHECK-NEON-NEXT:    mul x10, x9, x12
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mov w12, #128 // =0x80
+; CHECK-NEON-NEXT:    str x11, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x11, v1.d[1]
+; CHECK-NEON-NEXT:    str x10, [sp, #272] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    mul x27, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #256 // =0x100
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #280] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #512 // =0x200
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #248] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d25, d18, [sp, #248] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mov v18.d[1], x27
+; CHECK-NEON-NEXT:    mov x27, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #224] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2048 // =0x800
+; CHECK-NEON-NEXT:    ldr d19, [sp, #224] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #232] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #200] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #240] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #192] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8192 // =0x2000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d26, d20, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #208] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #176] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16384 // =0x4000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #216] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #168] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #32768 // =0x8000
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    ldp d17, d21, [sp, #168] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    str x10, [sp, #184] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    str x11, [sp, #152] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #65536 // =0x10000
+; CHECK-NEON-NEXT:    ldr d22, [sp, #152] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x15, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #131072 // =0x20000
+; CHECK-NEON-NEXT:    str x10, [sp, #144] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d23, x15
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #120] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #262144 // =0x40000
+; CHECK-NEON-NEXT:    mul x17, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    str x10, [sp, #160] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #524288 // =0x80000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str x11, [sp, #104] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    str x10, [sp, #128] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #80] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    ldr d9, [sp, #80] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #136] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    mul x25, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x30, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    fmov d10, x25
+; CHECK-NEON-NEXT:    str x10, [sp, #112] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d30, x30
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x11, [sp, #48] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    ldr d28, [sp, #48] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #88] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    mul x26, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mul x29, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    str x10, [sp, #96] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    mul x21, x8, x11
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d8, x21
+; CHECK-NEON-NEXT:    str x10, [sp, #64] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    mul x7, x8, x11
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d11, x7
+; CHECK-NEON-NEXT:    str x10, [sp, #72] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    mul x18, x8, x11
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    fmov d31, x18
+; CHECK-NEON-NEXT:    str x10, [sp, #56] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    dup v3.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    mul x1, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    dup v4.2d, x11
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    dup v2.2d, x12
+; CHECK-NEON-NEXT:    mov w12, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    mul x14, x8, x11
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    str x10, [sp, #32] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v1.d[1]
+; CHECK-NEON-NEXT:    dup v1.2d, x12
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov x12, v1.d[1]
+; CHECK-NEON-NEXT:    str x10, [sp, #16] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v3.d[1]
+; CHECK-NEON-NEXT:    and v3.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x23, x9, x12
+; CHECK-NEON-NEXT:    movi v4.4s, #128, lsl #24
+; CHECK-NEON-NEXT:    mov x12, v3.d[1]
+; CHECK-NEON-NEXT:    mul x19, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v2.d[1]
+; CHECK-NEON-NEXT:    fneg v4.2d, v4.2d
+; CHECK-NEON-NEXT:    mul x22, x9, x12
+; CHECK-NEON-NEXT:    mul x20, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    dup v2.2d, x13
+; CHECK-NEON-NEXT:    fmov x13, d1
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    and v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x16, x8, x10
+; CHECK-NEON-NEXT:    mul x0, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    dup v3.2d, x2
+; CHECK-NEON-NEXT:    mov x12, v2.d[1]
+; CHECK-NEON-NEXT:    mov x2, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    dup v6.2d, x2
+; CHECK-NEON-NEXT:    mul x5, x8, x13
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v3.16b
+; CHECK-NEON-NEXT:    fmov x13, d2
+; CHECK-NEON-NEXT:    fmov d2, x4
+; CHECK-NEON-NEXT:    fmov d3, x6
+; CHECK-NEON-NEXT:    mov x6, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    mul x10, x9, x12
+; CHECK-NEON-NEXT:    and v7.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x24
+; CHECK-NEON-NEXT:    mov x12, v1.d[1]
+; CHECK-NEON-NEXT:    mov x24, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    mul x4, x8, x13
+; CHECK-NEON-NEXT:    mov x13, v4.d[1]
+; CHECK-NEON-NEXT:    mov v2.d[1], x28
+; CHECK-NEON-NEXT:    str x10, [sp, #40] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x28, x9, x12
+; CHECK-NEON-NEXT:    fmov x12, d1
+; CHECK-NEON-NEXT:    dup v1.2d, x3
+; CHECK-NEON-NEXT:    fmov x3, d4
+; CHECK-NEON-NEXT:    mov v5.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x11, x9, x13
+; CHECK-NEON-NEXT:    mov x13, v7.d[1]
+; CHECK-NEON-NEXT:    dup v4.2d, x6
+; CHECK-NEON-NEXT:    mov v3.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v1.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x2, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    mov v6.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v4.16b, v0.16b, v4.16b
+; CHECK-NEON-NEXT:    mov x6, v1.d[1]
+; CHECK-NEON-NEXT:    eor v2.16b, v5.16b, v2.16b
+; CHECK-NEON-NEXT:    mul x3, x8, x3
+; CHECK-NEON-NEXT:    mov v25.d[1], x10
+; CHECK-NEON-NEXT:    mul x10, x9, x13
+; CHECK-NEON-NEXT:    fmov x13, d7
+; CHECK-NEON-NEXT:    dup v7.2d, x24
+; CHECK-NEON-NEXT:    mov x24, v4.d[1]
+; CHECK-NEON-NEXT:    fmov x15, d4
+; CHECK-NEON-NEXT:    fmov d4, x17
+; CHECK-NEON-NEXT:    eor v5.16b, v3.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d3, x16
+; CHECK-NEON-NEXT:    mov x16, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    ldr d7, [sp, #120] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v6.16b, v18.16b, v25.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x16
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    stp x11, x10, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x11, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    fmov x17, d16
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v3.d[1], x20
+; CHECK-NEON-NEXT:    mov v19.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x24, x9, x24
+; CHECK-NEON-NEXT:    mov v20.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #232] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x17, x8, x17
+; CHECK-NEON-NEXT:    mov v26.d[1], x10
+; CHECK-NEON-NEXT:    eor v6.16b, v6.16b, v19.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x6
+; CHECK-NEON-NEXT:    fmov x6, d1
+; CHECK-NEON-NEXT:    dup v1.2d, x27
+; CHECK-NEON-NEXT:    mov x27, v16.d[1]
+; CHECK-NEON-NEXT:    dup v16.2d, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #160] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v24.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #128] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v29.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    dup v16.2d, x12
+; CHECK-NEON-NEXT:    mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    str x10, [sp, #288] // 8-byte Spill
+; CHECK-NEON-NEXT:    ldr x10, [sp, #240] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v19.16b, v20.16b, v26.16b
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    mul x6, x8, x6
+; CHECK-NEON-NEXT:    mov v21.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #208] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v12.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d16, x29
+; CHECK-NEON-NEXT:    mul x27, x9, x27
+; CHECK-NEON-NEXT:    mov v17.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    dup v1.2d, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #216] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v22.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #184] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v27.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    ldr d1, [sp, #104] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v23.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #144] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v1.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d24
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v24.d[1]
+; CHECK-NEON-NEXT:    fmov d24, x26
+; CHECK-NEON-NEXT:    mul x30, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #136] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v22.16b, v22.16b, v23.16b
+; CHECK-NEON-NEXT:    mul x25, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v27.d[1]
+; CHECK-NEON-NEXT:    mov v9.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #112] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v7.16b, v22.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v10.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #88] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    eor v4.16b, v7.16b, v4.16b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    mov v30.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d27
+; CHECK-NEON-NEXT:    dup v27.2d, x12
+; CHECK-NEON-NEXT:    mov x12, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    eor v23.16b, v9.16b, v10.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v4.16b, v1.16b
+; CHECK-NEON-NEXT:    mul x26, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #96] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v13.16b, v0.16b, v27.16b
+; CHECK-NEON-NEXT:    str x10, [sp, #264] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v29.d[1]
+; CHECK-NEON-NEXT:    dup v27.2d, x12
+; CHECK-NEON-NEXT:    mov v28.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #64] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x12, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    mov v24.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d29
+; CHECK-NEON-NEXT:    and v14.16b, v0.16b, v27.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    fmov d27, x14
+; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    fmov d29, x1
+; CHECK-NEON-NEXT:    mul x7, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #72] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v27.d[1], x19
+; CHECK-NEON-NEXT:    mov v16.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #56] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x10, [sp, #256] // 8-byte Spill
+; CHECK-NEON-NEXT:    mov x10, v12.d[1]
+; CHECK-NEON-NEXT:    mov v8.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #24] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v11.d[1], x11
+; CHECK-NEON-NEXT:    fmov x11, d12
+; CHECK-NEON-NEXT:    dup v12.2d, x12
+; CHECK-NEON-NEXT:    mul x18, x9, x10
+; CHECK-NEON-NEXT:    mov x10, v13.d[1]
+; CHECK-NEON-NEXT:    and v12.16b, v0.16b, v12.16b
+; CHECK-NEON-NEXT:    mul x29, x8, x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #32] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x12, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d13
+; CHECK-NEON-NEXT:    dup v13.2d, x14
+; CHECK-NEON-NEXT:    mov v31.d[1], x11
+; CHECK-NEON-NEXT:    ldr x11, [sp, #16] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x14, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v12.d[1]
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v13.16b
+; CHECK-NEON-NEXT:    eor v13.16b, v2.16b, v5.16b
+; CHECK-NEON-NEXT:    fmov d2, x0
+; CHECK-NEON-NEXT:    mov x0, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    dup v25.2d, x0
+; CHECK-NEON-NEXT:    fmov d5, x5
+; CHECK-NEON-NEXT:    mov x5, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    mov x16, v18.d[1]
+; CHECK-NEON-NEXT:    mov v29.d[1], x11
+; CHECK-NEON-NEXT:    mov x11, v14.d[1]
+; CHECK-NEON-NEXT:    mul x1, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d12
+; CHECK-NEON-NEXT:    eor v26.16b, v13.16b, v6.16b
+; CHECK-NEON-NEXT:    eor v6.16b, v19.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v19.2d, x5
+; CHECK-NEON-NEXT:    mov v5.d[1], x22
+; CHECK-NEON-NEXT:    mov v2.d[1], x23
+; CHECK-NEON-NEXT:    ldp d13, d12, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x0, x8, x10
+; CHECK-NEON-NEXT:    mov x10, v20.d[1]
+; CHECK-NEON-NEXT:    eor v21.16b, v6.16b, v17.16b
+; CHECK-NEON-NEXT:    fmov d17, x4
+; CHECK-NEON-NEXT:    fmov d6, x2
+; CHECK-NEON-NEXT:    mul x16, x9, x16
+; CHECK-NEON-NEXT:    mov x2, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    mov x4, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    dup v22.2d, x4
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v6.d[1], x28
+; CHECK-NEON-NEXT:    mul x21, x9, x11
+; CHECK-NEON-NEXT:    fmov x11, d14
+; CHECK-NEON-NEXT:    ldr d14, [sp, #304] // 8-byte Reload
+; CHECK-NEON-NEXT:    str x16, [sp, #312] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x16, d18
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    dup v25.2d, x2
+; CHECK-NEON-NEXT:    str x10, [sp, #296] // 8-byte Spill
+; CHECK-NEON-NEXT:    fmov x10, d20
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v19.16b
+; CHECK-NEON-NEXT:    mul x19, x8, x16
+; CHECK-NEON-NEXT:    mov x16, v18.d[1]
+; CHECK-NEON-NEXT:    eor v19.16b, v26.16b, v21.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v23.16b, v30.16b
+; CHECK-NEON-NEXT:    and v23.16b, v0.16b, v25.16b
+; CHECK-NEON-NEXT:    eor v25.16b, v8.16b, v11.16b
+; CHECK-NEON-NEXT:    mul x5, x8, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #40] // 8-byte Reload
+; CHECK-NEON-NEXT:    dup v26.2d, x13
+; CHECK-NEON-NEXT:    eor v1.16b, v19.16b, v1.16b
+; CHECK-NEON-NEXT:    fmov d19, x6
+; CHECK-NEON-NEXT:    mov v17.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v20.d[1]
+; CHECK-NEON-NEXT:    mul x2, x9, x16
+; CHECK-NEON-NEXT:    fmov x16, d18
+; CHECK-NEON-NEXT:    fmov d18, x3
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v28.16b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    ldp d9, d8, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    ldp d11, d10, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x3, x9, x10
+; CHECK-NEON-NEXT:    fmov x10, d20
+; CHECK-NEON-NEXT:    eor v4.16b, v21.16b, v24.16b
+; CHECK-NEON-NEXT:    eor v21.16b, v25.16b, v31.16b
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v22.16b
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v17.16b
+; CHECK-NEON-NEXT:    mul x20, x8, x16
+; CHECK-NEON-NEXT:    mov x16, v23.d[1]
+; CHECK-NEON-NEXT:    fmov d17, x30
+; CHECK-NEON-NEXT:    eor v4.16b, v4.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov x15, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #272] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v29.16b
+; CHECK-NEON-NEXT:    dup v24.2d, x15
+; CHECK-NEON-NEXT:    mov x4, v20.d[1]
+; CHECK-NEON-NEXT:    fmov x15, d20
+; CHECK-NEON-NEXT:    mov v18.d[1], x10
+; CHECK-NEON-NEXT:    ldr x10, [sp, #280] // 8-byte Reload
+; CHECK-NEON-NEXT:    mul x23, x9, x16
+; CHECK-NEON-NEXT:    eor v21.16b, v21.16b, v27.16b
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v6.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x25
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    mov x25, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    dup v22.2d, x10
+; CHECK-NEON-NEXT:    fmov x10, d23
+; CHECK-NEON-NEXT:    and v23.16b, v0.16b, v26.16b
+; CHECK-NEON-NEXT:    eor v3.16b, v21.16b, v3.16b
+; CHECK-NEON-NEXT:    mul x6, x9, x4
+; CHECK-NEON-NEXT:    ldr x4, [sp, #288] // 8-byte Reload
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mov v16.d[1], x24
+; CHECK-NEON-NEXT:    fmov d4, x17
+; CHECK-NEON-NEXT:    mov x16, v23.d[1]
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v22.16b
+; CHECK-NEON-NEXT:    and v22.16b, v0.16b, v24.16b
+; CHECK-NEON-NEXT:    mov v19.d[1], x4
+; CHECK-NEON-NEXT:    fmov x4, d23
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    fmov d23, x26
+; CHECK-NEON-NEXT:    fmov d18, x7
+; CHECK-NEON-NEXT:    mov x24, v20.d[1]
+; CHECK-NEON-NEXT:    fmov d6, x29
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mul x22, x9, x16
+; CHECK-NEON-NEXT:    mov x16, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    mov x14, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    dup v21.2d, x16
+; CHECK-NEON-NEXT:    mov x16, v22.d[1]
+; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v7.16b
+; CHECK-NEON-NEXT:    mul x17, x8, x4
+; CHECK-NEON-NEXT:    fmov x4, d20
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    dup v2.2d, x14
+; CHECK-NEON-NEXT:    mov v6.d[1], x18
+; CHECK-NEON-NEXT:    mov v5.d[1], x12
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v21.2d, x25
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov v4.d[1], x27
+; CHECK-NEON-NEXT:    ldp x29, x30, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x7, x9, x16
+; CHECK-NEON-NEXT:    mov x16, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    dup v7.2d, x16
+; CHECK-NEON-NEXT:    ldr x16, [sp, #264] // 8-byte Reload
+; CHECK-NEON-NEXT:    and v21.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    mov x26, v20.d[1]
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v23.d[1], x16
+; CHECK-NEON-NEXT:    ldr x16, [sp, #256] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov x18, v21.d[1]
+; CHECK-NEON-NEXT:    mul x27, x8, x4
+; CHECK-NEON-NEXT:    fmov x4, d22
+; CHECK-NEON-NEXT:    mov v18.d[1], x16
+; CHECK-NEON-NEXT:    fmov x16, d20
+; CHECK-NEON-NEXT:    and v20.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    eor v7.16b, v3.16b, v19.16b
+; CHECK-NEON-NEXT:    and v19.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    fmov d2, x0
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x24, x9, x24
+; CHECK-NEON-NEXT:    mov x12, v20.d[1]
+; CHECK-NEON-NEXT:    fmov x0, d20
+; CHECK-NEON-NEXT:    fmov d20, x13
+; CHECK-NEON-NEXT:    mul x14, x8, x16
+; CHECK-NEON-NEXT:    fmov x16, d21
+; CHECK-NEON-NEXT:    eor v7.16b, v7.16b, v16.16b
+; CHECK-NEON-NEXT:    eor v16.16b, v17.16b, v23.16b
+; CHECK-NEON-NEXT:    fmov d17, x20
+; CHECK-NEON-NEXT:    mov x20, v19.d[1]
+; CHECK-NEON-NEXT:    mul x11, x9, x18
+; CHECK-NEON-NEXT:    mov x18, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    mov v20.d[1], x3
+; CHECK-NEON-NEXT:    dup v21.2d, x18
+; CHECK-NEON-NEXT:    mov v3.d[1], x21
+; CHECK-NEON-NEXT:    mov v2.d[1], x1
+; CHECK-NEON-NEXT:    mul x18, x8, x16
+; CHECK-NEON-NEXT:    mov v17.d[1], x2
+; CHECK-NEON-NEXT:    eor v16.16b, v16.16b, v18.16b
+; CHECK-NEON-NEXT:    mov x2, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    mov x3, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    eor v4.16b, v7.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x16, x9, x12
+; CHECK-NEON-NEXT:    mov x12, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v21.16b, v0.16b, v21.16b
+; CHECK-NEON-NEXT:    dup v18.2d, x12
+; CHECK-NEON-NEXT:    fmov x12, d19
+; CHECK-NEON-NEXT:    fmov d19, x10
+; CHECK-NEON-NEXT:    eor v6.16b, v16.16b, v6.16b
+; CHECK-NEON-NEXT:    dup v16.2d, x2
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    fmov d20, x15
+; CHECK-NEON-NEXT:    mul x13, x8, x0
+; CHECK-NEON-NEXT:    mov x10, v21.d[1]
+; CHECK-NEON-NEXT:    mov v19.d[1], x23
+; CHECK-NEON-NEXT:    and v18.16b, v0.16b, v18.16b
+; CHECK-NEON-NEXT:    fmov x15, d21
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    fmov d21, x17
+; CHECK-NEON-NEXT:    mov v20.d[1], x6
+; CHECK-NEON-NEXT:    mov x17, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v5.16b
+; CHECK-NEON-NEXT:    mul x0, x9, x20
+; CHECK-NEON-NEXT:    mov x1, v18.d[1]
+; CHECK-NEON-NEXT:    dup v7.2d, x3
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v19.16b
+; CHECK-NEON-NEXT:    mov x2, v16.d[1]
+; CHECK-NEON-NEXT:    dup v19.2d, x17
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    mov v21.d[1], x22
+; CHECK-NEON-NEXT:    fmov x17, d18
+; CHECK-NEON-NEXT:    fmov d18, x27
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v3.16b
+; CHECK-NEON-NEXT:    and v7.16b, v0.16b, v7.16b
+; CHECK-NEON-NEXT:    eor v6.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    fmov d17, x12
+; CHECK-NEON-NEXT:    mul x25, x8, x4
+; CHECK-NEON-NEXT:    and v19.16b, v0.16b, v19.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v4.16b
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v18.d[1], x24
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x0
+; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v21.16b
+; CHECK-NEON-NEXT:    movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT:    mul x0, x9, x2
+; CHECK-NEON-NEXT:    fmov x2, d16
+; CHECK-NEON-NEXT:    fmov v16.2d, #2.00000000
+; CHECK-NEON-NEXT:    fmov d20, x15
+; CHECK-NEON-NEXT:    mov x12, v19.d[1]
+; CHECK-NEON-NEXT:    fmov d21, x25
+; CHECK-NEON-NEXT:    mul x17, x8, x17
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    fneg v6.2d, v6.2d
+; CHECK-NEON-NEXT:    ldp x22, x21, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x15, x8, x2
+; CHECK-NEON-NEXT:    fmov x2, d19
+; CHECK-NEON-NEXT:    and v16.16b, v0.16b, v16.16b
+; CHECK-NEON-NEXT:    mov v20.d[1], x10
+; CHECK-NEON-NEXT:    mov x10, v7.d[1]
+; CHECK-NEON-NEXT:    mov v21.d[1], x7
+; CHECK-NEON-NEXT:    mul x1, x9, x1
+; CHECK-NEON-NEXT:    ldp x24, x23, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    fmov d19, x17
+; CHECK-NEON-NEXT:    and v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    mul x17, x8, x2
+; CHECK-NEON-NEXT:    ldr x14, [sp, #312] // 8-byte Reload
+; CHECK-NEON-NEXT:    fmov d18, x15
+; CHECK-NEON-NEXT:    mov x15, v16.d[1]
+; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v20.16b
+; CHECK-NEON-NEXT:    mul x4, x9, x26
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v21.16b
+; CHECK-NEON-NEXT:    mov v19.d[1], x1
+; CHECK-NEON-NEXT:    fmov x1, d7
+; CHECK-NEON-NEXT:    fmov d7, x19
+; CHECK-NEON-NEXT:    mul x12, x9, x12
+; CHECK-NEON-NEXT:    mov v18.d[1], x0
+; CHECK-NEON-NEXT:    ldp x20, x19, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x0, x8, x1
+; CHECK-NEON-NEXT:    mov v7.d[1], x14
+; CHECK-NEON-NEXT:    eor v5.16b, v17.16b, v19.16b
+; CHECK-NEON-NEXT:    fmov d17, x17
+; CHECK-NEON-NEXT:    fmov x17, d0
+; CHECK-NEON-NEXT:    mul x14, x9, x15
+; CHECK-NEON-NEXT:    fmov x15, d16
+; CHECK-NEON-NEXT:    mov v6.d[1], x4
+; CHECK-NEON-NEXT:    fmov d16, x5
+; CHECK-NEON-NEXT:    ldp x26, x25, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x10, x9, x10
+; CHECK-NEON-NEXT:    mov v17.d[1], x12
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v18.16b
+; CHECK-NEON-NEXT:    mov x12, v0.d[1]
+; CHECK-NEON-NEXT:    fmov d0, x18
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v7.16b
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    fmov d7, x0
+; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v6.16b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    ldp x28, x27, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    mul x8, x8, x17
+; CHECK-NEON-NEXT:    ldr x17, [sp, #296] // 8-byte Reload
+; CHECK-NEON-NEXT:    mov v0.d[1], x11
+; CHECK-NEON-NEXT:    mov v7.d[1], x10
+; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v17.16b
+; CHECK-NEON-NEXT:    mov v16.d[1], x17
+; CHECK-NEON-NEXT:    mul x9, x9, x12
+; CHECK-NEON-NEXT:    mov v6.d[1], x16
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    eor v0.16b, v3.16b, v0.16b
+; CHECK-NEON-NEXT:    fmov d4, x8
+; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v7.16b
+; CHECK-NEON-NEXT:    mov v17.d[1], x14
+; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v16.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v6.16b
+; CHECK-NEON-NEXT:    mov v4.d[1], x9
+; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v17.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    eor v1.16b, v2.16b, v4.16b
+; CHECK-NEON-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-NEON-NEXT:    rbit v0.16b, v0.16b
+; CHECK-NEON-NEXT:    ushr v0.2d, v0.2d, #1
+; CHECK-NEON-NEXT:    add sp, sp, #464
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmulh_v2i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    rev64 v1.16b, v1.16b
+; CHECK-AES-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-AES-NEXT:    rbit v1.16b, v1.16b
+; CHECK-AES-NEXT:    rbit v0.16b, v0.16b
+; CHECK-AES-NEXT:    pmull2 v2.1q, v0.2d, v1.2d
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-AES-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-AES-NEXT:    rbit v0.16b, v0.16b
+; CHECK-AES-NEXT:    ushr v0.2d, v0.2d, #1
+; CHECK-AES-NEXT:    ret
+  %a.ext = zext <2 x i64> %a to <2 x i128>
+  %b.ext = zext <2 x i64> %b to <2 x i128>
+  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
+  %res.ext = lshr <2 x i128> %clmul, splat (i128 64)
+  %res = trunc <2 x i128> %res.ext to <2 x i64>
+  ret <2 x i64> %res
+}
 
-; TODO
-;define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
-;  %a.ext = zext <1 x i64> %a to <1 x i128>
-;  %b.ext = zext <1 x i64> %b to <1 x i128>
-;  %clmul = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
-;  %res.ext = lshr <1 x i128> %clmul, splat (i128 64)
-;  %res = trunc <1 x i128> %res.ext to <1 x i64>
-;  ret <1 x i64> %res
-;}
+define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
+; CHECK-NEON-LABEL: clmulh_v1i64_neon:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    rev64 v1.8b, v1.8b
+; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEON-NEXT:    rev64 v2.8b, v0.8b
+; CHECK-NEON-NEXT:    mov w10, #8 // =0x8
+; CHECK-NEON-NEXT:    mov w11, #16 // =0x10
+; CHECK-NEON-NEXT:    mov w12, #32 // =0x20
+; CHECK-NEON-NEXT:    mov w13, #64 // =0x40
+; CHECK-NEON-NEXT:    mov w14, #128 // =0x80
+; CHECK-NEON-NEXT:    mov w15, #256 // =0x100
+; CHECK-NEON-NEXT:    rbit v0.8b, v1.8b
+; CHECK-NEON-NEXT:    fmov d1, x8
+; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    rbit v2.8b, v2.8b
+; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEON-NEXT:    fmov d4, x8
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x8, d2
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
+; CHECK-NEON-NEXT:    fmov x11, d1
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    fmov x13, d1
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov d3, x15
+; CHECK-NEON-NEXT:    fmov x14, d2
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov x9, d1
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    mov w11, #2048 // =0x800
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    fmov x12, d1
+; CHECK-NEON-NEXT:    fmov d1, x13
+; CHECK-NEON-NEXT:    mov w13, #4096 // =0x1000
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov w11, #8192 // =0x2000
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x9, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x9
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    mul x9, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    fmov x15, d5
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x11, d7
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mul x12, x8, x15
+; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x15
+; CHECK-NEON-NEXT:    mov w15, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    movi v4.2s, #128, lsl #24
+; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov w15, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    fmov x14, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fneg d4, d4
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    fmov x15, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x15, d16
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x15
+; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mov x15, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d17, x13
+; CHECK-NEON-NEXT:    fmov d16, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x13, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mov x9, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mov x11, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov x10, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d6
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x12, x8, x10
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x9
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x9, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d16, x9
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x14
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d7, x15
+; CHECK-NEON-NEXT:    fmov x15, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d6
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    mov x13, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x15, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d7
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    fmov d17, x13
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    mul x14, x8, x10
+; CHECK-NEON-NEXT:    eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    fmov x12, d16
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    fmov d18, x15
+; CHECK-NEON-NEXT:    mov x15, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    fmov x13, d17
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d17, x12
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov x14, d16
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x14
+; CHECK-NEON-NEXT:    mov x14, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov x11, d17
+; CHECK-NEON-NEXT:    fmov d17, x14
+; CHECK-NEON-NEXT:    mov x14, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov x14, d16
+; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d18, x12
+; CHECK-NEON-NEXT:    fmov x12, d17
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    mul x15, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov x12, d16
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v7.8b, v18.8b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    fmov x13, d17
+; CHECK-NEON-NEXT:    fmov d16, x15
+; CHECK-NEON-NEXT:    mov x15, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov x14, d3
+; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    mov x15, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d7, x15
+; CHECK-NEON-NEXT:    mov x15, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    movi d16, #0000000000000000
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d18, x13
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fneg d16, d16
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
+; CHECK-NEON-NEXT:    fmov x15, d7
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v18.8b
+; CHECK-NEON-NEXT:    mul x13, x8, x13
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    fmov x14, d17
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x11, d7
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    fmov x11, d0
+; CHECK-NEON-NEXT:    fmov d0, x15
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x8, x8, x11
+; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d3, x8
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-NEON-NEXT:    rbit v0.8b, v0.8b
+; CHECK-NEON-NEXT:    ushr d0, d0, #1
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-AES-LABEL: clmulh_v1i64_neon:
+; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    rev64 v1.8b, v1.8b
+; CHECK-AES-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-AES-NEXT:    rbit v1.8b, v1.8b
+; CHECK-AES-NEXT:    rbit v0.8b, v0.8b
+; CHECK-AES-NEXT:    pmull v0.1q, v0.1d, v1.1d
+; CHECK-AES-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-AES-NEXT:    rbit v0.8b, v0.8b
+; CHECK-AES-NEXT:    ushr d0, d0, #1
+; CHECK-AES-NEXT:    ret
+  %a.ext = zext <1 x i64> %a to <1 x i128>
+  %b.ext = zext <1 x i64> %b to <1 x i128>
+  %clmul = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
+  %res.ext = lshr <1 x i128> %clmul, splat (i128 64)
+  %res = trunc <1 x i128> %res.ext to <1 x i64>
+  ret <1 x i64> %res
+}


        


More information about the llvm-commits mailing list