[llvm] b7cb18c - [AArch64][GISel] Expand test coverage of FPow.

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 12 02:08:15 PDT 2023


Author: David Green
Date: 2023-09-12T10:08:09+01:00
New Revision: b7cb18c5eb1a419625833d1dc14773b93f087c0d

URL: https://github.com/llvm/llvm-project/commit/b7cb18c5eb1a419625833d1dc14773b93f087c0d
DIFF: https://github.com/llvm/llvm-project/commit/b7cb18c5eb1a419625833d1dc14773b93f087c0d.diff

LOG: [AArch64][GISel] Expand test coverage of FPow.

This adds some more extensive test coverage for fpow intrinsics through global
isel, and removes the unused vector libcall types. All types get scalarized,
fp16 will be expanded to fp32 and then we lower to a libcall from there.

Added: 
    llvm/test/CodeGen/AArch64/fpow.ll

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e2df8fb1321df83..923f937e162cba0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -283,7 +283,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .scalarize(0)
       // Regardless of FP16 support, widen 16-bit elements to 32-bits.
       .minScalar(0, s32)
-      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
+      .libcallFor({s32, s64});
 
   getActionDefinitionsBuilder(G_INSERT)
       .legalIf(all(typeInSet(0, {s32, s64, p0}),

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index 887f94d41c4a4c6..0278128b25b62b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -44,17 +44,6 @@ define %v4f16 @test_v4f16.cos(%v4f16 %a) {
   ret %v4f16 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v4f16.pow
-define %v4f16 @test_v4f16.pow(%v4f16 %a, %v4f16 %b) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.pow:
-  ; GISEL-LABEL:   test_v4f16.pow:
-  ; CHECK-COUNT-4: bl pow
-  ; GISEL-COUNT-4: bl pow
-  %1 = call %v4f16 @llvm.pow.v4f16(%v4f16 %a, %v4f16 %b)
-  ret %v4f16 %1
-}
-
 ; FALLBACK-NOT: remark{{.*}}test_v4f16.exp
 define %v4f16 @test_v4f16.exp(%v4f16 %a) {
   ; This operation is expanded, whether with or without +fullfp16.
@@ -123,7 +112,6 @@ define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
 declare %v4f16 @llvm.powi.v4f16.i32(%v4f16, i32) #0
 declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
 declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
-declare %v4f16 @llvm.pow.v4f16(%v4f16, %v4f16) #0
 declare %v4f16 @llvm.exp.v4f16(%v4f16) #0
 declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0
 declare %v4f16 @llvm.log.v4f16(%v4f16) #0
@@ -167,17 +155,6 @@ define %v8f16 @test_v8f16.cos(%v8f16 %a) {
   ret %v8f16 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v8f16.pow
-define %v8f16 @test_v8f16.pow(%v8f16 %a, %v8f16 %b) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.pow:
-  ; CHECK-COUNT-8: bl pow
-  ; GISEL-LABEL:   test_v8f16.pow:
-  ; GISEL-COUNT-8: bl pow
-  %1 = call %v8f16 @llvm.pow.v8f16(%v8f16 %a, %v8f16 %b)
-  ret %v8f16 %1
-}
-
 ; FALLBACK-NOT: remark{{.*}}test_v8f16.exp
 define %v8f16 @test_v8f16.exp(%v8f16 %a) {
   ; This operation is expanded, whether with or without +fullfp16.
@@ -246,7 +223,6 @@ define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
 declare %v8f16 @llvm.powi.v8f16.i32(%v8f16, i32) #0
 declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
 declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
-declare %v8f16 @llvm.pow.v8f16(%v8f16, %v8f16) #0
 declare %v8f16 @llvm.exp.v8f16(%v8f16) #0
 declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0
 declare %v8f16 @llvm.log.v8f16(%v8f16) #0
@@ -283,16 +259,6 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) {
   ret %v2f32 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v2f32.pow
-; CHECK: test_v2f32.pow:
-; GISEL-LABEL: test_v2f32.pow:
-define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
-  ; CHECK: pow
-  ; GISEL: pow
-  %1 = call %v2f32 @llvm.pow.v2f32(%v2f32 %a, %v2f32 %b)
-  ret %v2f32 %1
-}
-
 ; FALLBACK-NOT: remark{{.*}}test_v2f32.exp
 ; CHECK: test_v2f32.exp:
 ; GISEL: test_v2f32.exp:
@@ -351,7 +317,6 @@ define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
 declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
-declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
 declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
 declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
 declare %v2f32 @llvm.log.v2f32(%v2f32) #0
@@ -388,16 +353,6 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) {
   ret %v4f32 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v4f32.pow
-; CHECK: test_v4f32.pow:
-; GISEL-LABEL: test_v4f32.pow:
-define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
-  ; CHECK: pow
-  ; GISEL: pow
-  %1 = call %v4f32 @llvm.pow.v4f32(%v4f32 %a, %v4f32 %b)
-  ret %v4f32 %1
-}
-
 ; FALLBACK-NOT: remark{{.*}}test_v4f32.exp
 ; CHECK: test_v4f32.exp:
 ; GISEL: test_v4f32.exp:
@@ -455,7 +410,6 @@ define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
 declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
 declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
 declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
-declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
 declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
 declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
 declare %v4f32 @llvm.log.v4f32(%v4f32) #0
@@ -492,16 +446,6 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) {
   ret %v2f64 %1
 }
 
-; FALLBACK-NOT: remark{{.*}}test_v2f64.pow
-; CHECK: test_v2f64.pow:
-; GISEL-LABEL: test_v2f64.pow:
-define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
-  ; CHECK: pow
-  ; GISEL: pow
-  %1 = call %v2f64 @llvm.pow.v2f64(%v2f64 %a, %v2f64 %b)
-  ret %v2f64 %1
-}
-
 ; FALLBACK-NOT: remark{{.*}}test_v2f64.exp
 ; CHECK: test_v2f64.exp:
 ; GISEL: test_v2f64.exp:
@@ -560,7 +504,6 @@ define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
 declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
 declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
 declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
-declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
 declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
 declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
 declare %v2f64 @llvm.log.v2f64(%v2f64) #0

diff  --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
new file mode 100644
index 000000000000000..9766e22199377e7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -0,0 +1,1593 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define double @pow_f64(double %a, double %b) {
+; CHECK-SD-LABEL: pow_f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    b pow
+;
+; CHECK-GI-LABEL: pow_f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call double @llvm.pow.f64(double %a, double %b)
+  ret double %c
+}
+
+define float @pow_f32(float %a, float %b) {
+; CHECK-SD-LABEL: pow_f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    b powf
+;
+; CHECK-GI-LABEL: pow_f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call float @llvm.pow.f32(float %a, float %b)
+  ret float %c
+}
+
+define half @pow_f16(half %a, half %b) {
+; CHECK-LABEL: pow_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fcvt s1, h1
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %c = call half @llvm.pow.f16(half %a, half %b)
+  ret half %c
+}
+
+define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-SD-LABEL: pow_v2f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov d0, v0.d[1]
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v2f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #48
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d9
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    add sp, sp, #48
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <2 x double> @llvm.pow.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %c
+}
+
+define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) {
+; CHECK-SD-LABEL: pow_v3f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str d12, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp d11, d10, [sp, #8] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT:    .cfi_offset w30, -8
+; CHECK-SD-NEXT:    .cfi_offset b8, -16
+; CHECK-SD-NEXT:    .cfi_offset b9, -24
+; CHECK-SD-NEXT:    .cfi_offset b10, -32
+; CHECK-SD-NEXT:    .cfi_offset b11, -40
+; CHECK-SD-NEXT:    .cfi_offset b12, -48
+; CHECK-SD-NEXT:    fmov d11, d1
+; CHECK-SD-NEXT:    fmov d1, d3
+; CHECK-SD-NEXT:    fmov d8, d5
+; CHECK-SD-NEXT:    fmov d9, d4
+; CHECK-SD-NEXT:    fmov d10, d2
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    fmov d12, d0
+; CHECK-SD-NEXT:    fmov d0, d11
+; CHECK-SD-NEXT:    fmov d1, d9
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    fmov d9, d0
+; CHECK-SD-NEXT:    fmov d0, d10
+; CHECK-SD-NEXT:    fmov d1, d8
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    fmov d1, d9
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d11, d10, [sp, #8] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fmov d0, d12
+; CHECK-SD-NEXT:    ldr d12, [sp], #48 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v3f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str d12, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #8] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    .cfi_offset b9, -24
+; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    .cfi_offset b11, -40
+; CHECK-GI-NEXT:    .cfi_offset b12, -48
+; CHECK-GI-NEXT:    fmov d8, d1
+; CHECK-GI-NEXT:    fmov d1, d3
+; CHECK-GI-NEXT:    fmov d9, d2
+; CHECK-GI-NEXT:    fmov d10, d4
+; CHECK-GI-NEXT:    fmov d11, d5
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    fmov d12, d0
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    fmov d8, d0
+; CHECK-GI-NEXT:    fmov d0, d9
+; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    fmov d1, d8
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #8] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d0, d12
+; CHECK-GI-NEXT:    ldr d12, [sp], #48 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <3 x double> @llvm.pow.v3f64(<3 x double> %a, <3 x double> %b)
+  ret <3 x double> %c
+}
+
+define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-SD-LABEL: pow_v4f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q2, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov d0, v0.d[1]
+; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov d0, v0.d[1]
+; CHECK-SD-NEXT:    mov d1, v1.d[1]
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    ldp q2, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.d[1], v2.d[0]
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v4f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    mov v4.16b, v1.16b
+; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
+; CHECK-GI-NEXT:    str q3, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    mov d10, v2.d[1]
+; CHECK-GI-NEXT:    mov d11, v3.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    mov d9, v4.d[1]
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov d0, d9
+; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <4 x double> @llvm.pow.v4f64(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %c
+}
+
+define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-SD-LABEL: pow_v2f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    mov s1, v1.s[1]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v2f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #48
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s9
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
+; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    add sp, sp, #48
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <2 x float> @llvm.pow.v2f32(<2 x float> %a, <2 x float> %b)
+  ret <2 x float> %c
+}
+
+define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
+; CHECK-SD-LABEL: pow_v3f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    mov s1, v1.s[1]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[2]
+; CHECK-SD-NEXT:    mov s1, v1.s[2]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v3f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #80
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    mov s10, v1.s[1]
+; CHECK-GI-NEXT:    mov s11, v1.s[2]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s10
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    add sp, sp, #80
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <3 x float> @llvm.pow.v3f32(<3 x float> %a, <3 x float> %b)
+  ret <3 x float> %c
+}
+
+define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-SD-LABEL: pow_v4f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    mov s1, v1.s[1]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[2]
+; CHECK-SD-NEXT:    mov s1, v1.s[2]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[3]
+; CHECK-SD-NEXT:    mov s1, v1.s[3]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v4f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    mov s12, v1.s[2]
+; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <4 x float> @llvm.pow.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %c
+}
+
+define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
+; CHECK-SD-LABEL: pow_v8f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q2, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov s1, v2.s[1]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[2]
+; CHECK-SD-NEXT:    mov s1, v1.s[2]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[3]
+; CHECK-SD-NEXT:    mov s1, v1.s[3]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    mov s1, v1.s[1]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[2]
+; CHECK-SD-NEXT:    mov s1, v1.s[2]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov s0, v0.s[3]
+; CHECK-SD-NEXT:    mov s1, v1.s[3]
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fmov s2, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.s[3], v2.s[0]
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v8f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #208
+; CHECK-GI-NEXT:    stp d15, d14, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #160] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #176] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #192] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 208
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    .cfi_offset b14, -72
+; CHECK-GI-NEXT:    .cfi_offset b15, -80
+; CHECK-GI-NEXT:    mov v4.16b, v1.16b
+; CHECK-GI-NEXT:    mov v1.16b, v2.16b
+; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    mov s12, v3.s[1]
+; CHECK-GI-NEXT:    mov s11, v3.s[2]
+; CHECK-GI-NEXT:    mov s2, v4.s[1]
+; CHECK-GI-NEXT:    stp q3, q4, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT:    mov s5, v4.s[3]
+; CHECK-GI-NEXT:    mov s14, v1.s[1]
+; CHECK-GI-NEXT:    mov s15, v1.s[2]
+; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-GI-NEXT:    str s2, [sp, #64] // 4-byte Folded Spill
+; CHECK-GI-NEXT:    mov s2, v4.s[2]
+; CHECK-GI-NEXT:    str s2, [sp, #112] // 4-byte Folded Spill
+; CHECK-GI-NEXT:    mov s2, v3.s[3]
+; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s14
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s15
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    str d0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr s0, [sp, #64] // 4-byte Folded Reload
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    str d0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    str d0, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #160] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.s[1], v2.s[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.s[2], v2.s[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.s[3], v2.s[0]
+; CHECK-GI-NEXT:    mov v3.s[3], v0.s[0]
+; CHECK-GI-NEXT:    mov v2.16b, v1.16b
+; CHECK-GI-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    add sp, sp, #208
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <8 x float> @llvm.pow.v8f32(<8 x float> %a, <8 x float> %b)
+  ret <8 x float> %c
+}
+
+define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) {
+; CHECK-SD-LABEL: pow_v7f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s0, h3
+; CHECK-SD-NEXT:    fcvt s1, h2
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    mov h1, v1.h[2]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[4]
+; CHECK-SD-NEXT:    mov h1, v1.h[4]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[5]
+; CHECK-SD-NEXT:    mov h1, v1.h[5]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[6]
+; CHECK-SD-NEXT:    mov h1, v1.h[6]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h1, s0
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v7f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #176
+; CHECK-GI-NEXT:    stp d15, d14, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #160] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    .cfi_offset b14, -72
+; CHECK-GI-NEXT:    .cfi_offset b15, -80
+; CHECK-GI-NEXT:    mov h2, v0.h[5]
+; CHECK-GI-NEXT:    mov h8, v0.h[1]
+; CHECK-GI-NEXT:    mov h9, v0.h[2]
+; CHECK-GI-NEXT:    mov h10, v0.h[3]
+; CHECK-GI-NEXT:    mov h11, v0.h[4]
+; CHECK-GI-NEXT:    mov h14, v1.h[1]
+; CHECK-GI-NEXT:    mov h15, v1.h[2]
+; CHECK-GI-NEXT:    mov h13, v1.h[3]
+; CHECK-GI-NEXT:    mov h12, v1.h[4]
+; CHECK-GI-NEXT:    str h2, [sp, #64] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-NEXT:    fcvt s0, h0
+; CHECK-GI-NEXT:    str h2, [sp, #80] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v1.h[5]
+; CHECK-GI-NEXT:    str h2, [sp, #172] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v1.h[6]
+; CHECK-GI-NEXT:    fcvt s1, h1
+; CHECK-GI-NEXT:    str h2, [sp, #174] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h8
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h14
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h9
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h15
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h10
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h13
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h11
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h12
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #64] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #172] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #80] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #174] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    add sp, sp, #176
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <7 x half> @llvm.pow.v7f16(<7 x half> %a, <7 x half> %b)
+  ret <7 x half> %c
+}
+
+define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-SD-LABEL: pow_v4f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s0, h3
+; CHECK-SD-NEXT:    fcvt s1, h2
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    fcvt s2, h1
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s1, h0
+; CHECK-SD-NEXT:    fmov s0, s2
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt h3, s0
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[2]
+; CHECK-SD-NEXT:    mov h2, v0.h[2]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v3.h[1], v0.h[0]
+; CHECK-SD-NEXT:    fcvt s0, h1
+; CHECK-SD-NEXT:    fcvt s1, h2
+; CHECK-SD-NEXT:    str q3, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q3, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-NEXT:    mov h2, v2.h[3]
+; CHECK-SD-NEXT:    mov v3.h[2], v0.h[0]
+; CHECK-SD-NEXT:    fcvt s0, h1
+; CHECK-SD-NEXT:    fcvt s1, h2
+; CHECK-SD-NEXT:    str q3, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h1, s0
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v4f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov h8, v0.h[1]
+; CHECK-GI-NEXT:    mov h9, v0.h[2]
+; CHECK-GI-NEXT:    mov h10, v0.h[3]
+; CHECK-GI-NEXT:    mov h11, v1.h[1]
+; CHECK-GI-NEXT:    mov h12, v1.h[2]
+; CHECK-GI-NEXT:    mov h13, v1.h[3]
+; CHECK-GI-NEXT:    fcvt s0, h0
+; CHECK-GI-NEXT:    fcvt s1, h1
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h8
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h11
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h9
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h12
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h10
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h13
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <4 x half> @llvm.pow.v4f16(<4 x half> %a, <4 x half> %b)
+  ret <4 x half> %c
+}
+
+define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-SD-LABEL: pow_v8f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    fcvt s0, h3
+; CHECK-SD-NEXT:    fcvt s1, h2
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    mov h1, v1.h[2]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[4]
+; CHECK-SD-NEXT:    mov h1, v1.h[4]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[5]
+; CHECK-SD-NEXT:    mov h1, v1.h[5]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[6]
+; CHECK-SD-NEXT:    mov h1, v1.h[6]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h1, s0
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v8f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #192
+; CHECK-GI-NEXT:    stp d15, d14, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #160] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    str x30, [sp, #176] // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    .cfi_offset b14, -72
+; CHECK-GI-NEXT:    .cfi_offset b15, -80
+; CHECK-GI-NEXT:    mov h2, v0.h[5]
+; CHECK-GI-NEXT:    mov h10, v0.h[1]
+; CHECK-GI-NEXT:    mov h11, v0.h[2]
+; CHECK-GI-NEXT:    mov h12, v0.h[3]
+; CHECK-GI-NEXT:    mov h13, v0.h[4]
+; CHECK-GI-NEXT:    mov h8, v1.h[1]
+; CHECK-GI-NEXT:    mov h9, v1.h[2]
+; CHECK-GI-NEXT:    mov h15, v1.h[3]
+; CHECK-GI-NEXT:    mov h14, v1.h[4]
+; CHECK-GI-NEXT:    str h2, [sp, #48] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v0.h[6]
+; CHECK-GI-NEXT:    str h2, [sp, #80] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v0.h[7]
+; CHECK-GI-NEXT:    fcvt s0, h0
+; CHECK-GI-NEXT:    str h2, [sp, #96] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v1.h[5]
+; CHECK-GI-NEXT:    str h2, [sp, #186] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v1.h[6]
+; CHECK-GI-NEXT:    str h2, [sp, #188] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h2, v1.h[7]
+; CHECK-GI-NEXT:    fcvt s1, h1
+; CHECK-GI-NEXT:    str h2, [sp, #190] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h10
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h8
+; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h11
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h9
+; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h12
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h15
+; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h13
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h14
+; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #48] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #186] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #80] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #188] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #96] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #190] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #160] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #176] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[6], v2.h[0]
+; CHECK-GI-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    add sp, sp, #192
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <8 x half> @llvm.pow.v8f16(<8 x half> %a, <8 x half> %b)
+  ret <8 x half> %c
+}
+
+define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
+; CHECK-SD-LABEL: pow_v16f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov h1, v2.h[1]
+; CHECK-SD-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    fcvt s0, h3
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[2]
+; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[4]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[4]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[5]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[5]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[6]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[6]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[1]
+; CHECK-SD-NEXT:    mov h1, v1.h[1]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[2]
+; CHECK-SD-NEXT:    mov h1, v1.h[2]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[3]
+; CHECK-SD-NEXT:    mov h1, v1.h[3]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[4]
+; CHECK-SD-NEXT:    mov h1, v1.h[4]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[5]
+; CHECK-SD-NEXT:    mov h1, v1.h[5]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[6]
+; CHECK-SD-NEXT:    mov h1, v1.h[6]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fcvt h0, s0
+; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NEXT:    fcvt s0, h0
+; CHECK-SD-NEXT:    fcvt s1, h1
+; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    fmov s1, s0
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-SD-NEXT:    fcvt h2, s1
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #48] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    mov v1.h[7], v2.h[0]
+; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: pow_v16f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #448
+; CHECK-GI-NEXT:    stp d15, d14, [sp, #368] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d13, d12, [sp, #384] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d11, d10, [sp, #400] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp d9, d8, [sp, #416] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    stp x29, x30, [sp, #432] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 448
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset w29, -16
+; CHECK-GI-NEXT:    .cfi_offset b8, -24
+; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    .cfi_offset b10, -40
+; CHECK-GI-NEXT:    .cfi_offset b11, -48
+; CHECK-GI-NEXT:    .cfi_offset b12, -56
+; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    .cfi_offset b14, -72
+; CHECK-GI-NEXT:    .cfi_offset b15, -80
+; CHECK-GI-NEXT:    mov h4, v0.h[4]
+; CHECK-GI-NEXT:    str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov h11, v0.h[1]
+; CHECK-GI-NEXT:    mov h12, v0.h[2]
+; CHECK-GI-NEXT:    mov h13, v0.h[3]
+; CHECK-GI-NEXT:    str q3, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    mov h15, v2.h[1]
+; CHECK-GI-NEXT:    mov h8, v2.h[2]
+; CHECK-GI-NEXT:    mov h9, v2.h[3]
+; CHECK-GI-NEXT:    mov h10, v2.h[4]
+; CHECK-GI-NEXT:    mov h14, v2.h[5]
+; CHECK-GI-NEXT:    str h4, [sp, #288] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h4, v0.h[5]
+; CHECK-GI-NEXT:    str h4, [sp, #240] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h4, v0.h[6]
+; CHECK-GI-NEXT:    str h4, [sp, #176] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h4, v0.h[7]
+; CHECK-GI-NEXT:    fcvt s0, h0
+; CHECK-GI-NEXT:    str h4, [sp, #144] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov v4.16b, v1.16b
+; CHECK-GI-NEXT:    mov h1, v1.h[1]
+; CHECK-GI-NEXT:    str h1, [sp, #48] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[2]
+; CHECK-GI-NEXT:    str h1, [sp, #80] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[3]
+; CHECK-GI-NEXT:    str h1, [sp, #128] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[4]
+; CHECK-GI-NEXT:    str h1, [sp, #192] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[5]
+; CHECK-GI-NEXT:    str h1, [sp, #256] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[6]
+; CHECK-GI-NEXT:    str h1, [sp, #320] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v4.h[7]
+; CHECK-GI-NEXT:    str h1, [sp, #352] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v2.h[6]
+; CHECK-GI-NEXT:    str h1, [sp, #12] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v2.h[7]
+; CHECK-GI-NEXT:    str h1, [sp, #14] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[1]
+; CHECK-GI-NEXT:    str h1, [sp, #44] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[2]
+; CHECK-GI-NEXT:    str h1, [sp, #46] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[3]
+; CHECK-GI-NEXT:    str h1, [sp, #78] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[4]
+; CHECK-GI-NEXT:    str h1, [sp, #110] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[5]
+; CHECK-GI-NEXT:    str h1, [sp, #174] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[6]
+; CHECK-GI-NEXT:    str h1, [sp, #222] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    mov h1, v3.h[7]
+; CHECK-GI-NEXT:    str h1, [sp, #286] // 2-byte Folded Spill
+; CHECK-GI-NEXT:    fcvt s1, h2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h11
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h15
+; CHECK-GI-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h12
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h8
+; CHECK-GI-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    fcvt s2, h13
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s1, h9
+; CHECK-GI-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #288] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    fcvt s1, h10
+; CHECK-GI-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #240] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    fcvt s1, h14
+; CHECK-GI-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #176] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #12] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #144] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr q1, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #48] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #44] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #80] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #46] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #128] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #78] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #192] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #110] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #256] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #174] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #320] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #222] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr h1, [sp, #352] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt h0, s0
+; CHECK-GI-NEXT:    fcvt s2, h1
+; CHECK-GI-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    ldr h0, [sp, #286] // 2-byte Folded Reload
+; CHECK-GI-NEXT:    fcvt s1, h0
+; CHECK-GI-NEXT:    fmov s0, s2
+; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #336] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[4], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #176] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
+; CHECK-GI-NEXT:    ldr q2, [sp, #320] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[5], v2.h[0]
+; CHECK-GI-NEXT:    fcvt h2, s0
+; CHECK-GI-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
+; CHECK-GI-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[6], v0.h[0]
+; CHECK-GI-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v1.h[7], v0.h[0]
+; CHECK-GI-NEXT:    mov v3.h[7], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    mov v1.16b, v3.16b
+; CHECK-GI-NEXT:    add sp, sp, #448
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <16 x half> @llvm.pow.v16f16(<16 x half> %a, <16 x half> %b)
+  ret <16 x half> %c
+}
+
+declare <16 x half> @llvm.pow.v16f16(<16 x half>, <16 x half>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
+declare <3 x double> @llvm.pow.v3f64(<3 x double>, <3 x double>)
+declare <3 x float> @llvm.pow.v3f32(<3 x float>, <3 x float>)
+declare <4 x double> @llvm.pow.v4f64(<4 x double>, <4 x double>)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare <4 x half> @llvm.pow.v4f16(<4 x half>, <4 x half>)
+declare <7 x half> @llvm.pow.v7f16(<7 x half>, <7 x half>)
+declare <8 x float> @llvm.pow.v8f32(<8 x float>, <8 x float>)
+declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+declare half @llvm.pow.f16(half, half)


        


More information about the llvm-commits mailing list