[llvm] a947be5 - [ARM] Various tests for MVE and FP16 codegen. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 24 04:12:20 PDT 2020
Author: David Green
Date: 2020-04-24T12:11:46+01:00
New Revision: a947be51bdaf3cb87e58a93126fa0f577865e7f1
URL: https://github.com/llvm/llvm-project/commit/a947be51bdaf3cb87e58a93126fa0f577865e7f1
DIFF: https://github.com/llvm/llvm-project/commit/a947be51bdaf3cb87e58a93126fa0f577865e7f1.diff
LOG: [ARM] Various tests for MVE and FP16 codegen. NFC
Added:
Modified:
llvm/test/CodeGen/ARM/fp16-bitcast.ll
llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
llvm/test/CodeGen/Thumb2/mve-vdup.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/ARM/fp16-bitcast.ll b/llvm/test/CodeGen/ARM/fp16-bitcast.ll
index 6d6b809faac8..e1fdf88856a6 100644
--- a/llvm/test/CodeGen/ARM/fp16-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/fp16-bitcast.ll
@@ -47,3 +47,41 @@ entry:
%tmp4.0.insert.ext = zext i16 %2 to i32
ret i32 %tmp4.0.insert.ext
}
+
+define half @load_i16(i16 *%hp) {
+; CHECK-VFPV4-LABEL: load_i16:
+; CHECK-VFPV4: @ %bb.0: @ %entry
+; CHECK-VFPV4-NEXT: vmov.f32 s0, #1.000000e+00
+; CHECK-VFPV4-NEXT: ldrh r0, [r0]
+; CHECK-VFPV4-NEXT: vmov s2, r0
+; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-VFPV4-NEXT: vadd.f32 s0, s2, s0
+; CHECK-VFPV4-NEXT: vmov r0, s0
+; CHECK-VFPV4-NEXT: bx lr
+;
+; CHECK-FP16-LABEL: load_i16:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vldr.16 s2, [r1]
+; CHECK-FP16-NEXT: vmov.f16 s0, #1.000000e+00
+; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0
+; CHECK-FP16-NEXT: vstr.16 s0, [r0]
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %h = load i16, i16 *%hp, align 2
+ %hc = bitcast i16 %h to half
+ %add = fadd half %hc, 1.0
+ ret half %add
+}
+
+define i16 @load_f16(half *%hp) {
+; CHECK-LABEL: load_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %h = load half, half *%hp, align 2
+ %hc = bitcast half %h to i16
+ %add = add i16 %hc, 1
+ ret i16 %add
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll b/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
index da07cd676946..4c69761f4f20 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vaddqr_v4i32:
@@ -72,3 +72,123 @@ entry:
%c = add <16 x i8> %sp, %src
ret <16 x i8> %c
}
+
+define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: vaddqr_v4f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vadd.f32 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x float> undef, float %src2, i32 0
+ %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
+ %c = fadd <4 x float> %src, %sp
+ ret <4 x float> %c
+}
+
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: vaddqr_v8f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vadd.f16 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %src2 = load half, half *%src2p, align 2
+ %i = insertelement <8 x half> undef, half %src2, i32 0
+ %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
+ %c = fadd <8 x half> %src, %sp
+ ret <8 x half> %c
+}
+
+define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_2(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: vaddqr_v4f32_2:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vadd.f32 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x float> undef, float %src2, i32 0
+ %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
+ %c = fadd <4 x float> %sp, %src
+ ret <4 x float> %c
+}
+
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: vaddqr_v8f16_2:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vadd.f16 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %src2 = load half, half *%src2p, align 2
+ %i = insertelement <8 x half> undef, half %src2, i32 0
+ %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
+ %c = fadd <8 x half> %sp, %src
+ ret <8 x half> %c
+}
+
+define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_3(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: vaddqr_v4f32_3:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vdup.32 q1, r0
+; CHECK-NEXT: vadd.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %src2bc = bitcast float %src2 to i32
+ %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
+ %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %sp = bitcast <4 x i32> %spbc to <4 x float>
+ %c = fadd <4 x float> %src, %sp
+ ret <4 x float> %c
+}
+
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: vaddqr_v8f16_3:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vdup.16 q1, r0
+; CHECK-NEXT: vadd.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %src2 = load half, half *%src2p, align 2
+ %src2bc = bitcast half %src2 to i16
+ %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
+ %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %sp = bitcast <8 x i16> %spbc to <8 x half>
+ %c = fadd <8 x half> %src, %sp
+ ret <8 x half> %c
+}
+
+define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_4(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: vaddqr_v4f32_4:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vdup.32 q1, r0
+; CHECK-NEXT: vadd.f32 q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %src2bc = bitcast float %src2 to i32
+ %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
+ %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %sp = bitcast <4 x i32> %spbc to <4 x float>
+ %c = fadd <4 x float> %sp, %src
+ ret <4 x float> %c
+}
+
+define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: vaddqr_v8f16_4:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vdup.16 q1, r0
+; CHECK-NEXT: vadd.f16 q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %src2 = load half, half *%src2p, align 2
+ %src2bc = bitcast half %src2 to i16
+ %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
+ %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %sp = bitcast <8 x i16> %spbc to <8 x half>
+ %c = fadd <8 x half> %sp, %src
+ ret <8 x half> %c
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 1c6c0ff9c2d9..f03034c42baf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5192,3 +5192,138 @@ entry:
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
+
+
+
+define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
+; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-MVE-NEXT: ldrh r0, [r0]
+; CHECK-MVE-NEXT: vmovx.f16 s12, s0
+; CHECK-MVE-NEXT: movs r2, #0
+; CHECK-MVE-NEXT: movs r1, #0
+; CHECK-MVE-NEXT: vdup.16 q4, r0
+; CHECK-MVE-NEXT: movs r0, #0
+; CHECK-MVE-NEXT: vmovx.f16 s14, s16
+; CHECK-MVE-NEXT: vmovx.f16 s22, s17
+; CHECK-MVE-NEXT: vcmp.f16 s12, s14
+; CHECK-MVE-NEXT: vmovx.f16 s12, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: vcmp.f16 s0, s16
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s14, s8
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vmovx.f16 s0, s3
+; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r2, #1
+; CHECK-MVE-NEXT: cmp r2, #0
+; CHECK-MVE-NEXT: cset r2, ne
+; CHECK-MVE-NEXT: vmov r0, s12
+; CHECK-MVE-NEXT: lsls r2, r2, #31
+; CHECK-MVE-NEXT: vcmp.f16 s1, s17
+; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s12
+; CHECK-MVE-NEXT: vmov.16 q3[0], r2
+; CHECK-MVE-NEXT: vmov.16 q3[1], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s20, s9, s5
+; CHECK-MVE-NEXT: vmov r0, s20
+; CHECK-MVE-NEXT: vmovx.f16 s20, s1
+; CHECK-MVE-NEXT: vcmp.f16 s20, s22
+; CHECK-MVE-NEXT: vmov.16 q3[2], r0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: mov.w r0, #0
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s20, s5
+; CHECK-MVE-NEXT: vmovx.f16 s22, s9
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
+; CHECK-MVE-NEXT: vcmp.f16 s2, s18
+; CHECK-MVE-NEXT: vmov r0, s20
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov.16 q3[3], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s22, s18
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s20, s10, s6
+; CHECK-MVE-NEXT: vmov r0, s20
+; CHECK-MVE-NEXT: vmovx.f16 s20, s2
+; CHECK-MVE-NEXT: vcmp.f16 s20, s22
+; CHECK-MVE-NEXT: vmov.16 q3[4], r0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: mov.w r0, #0
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s20, s6
+; CHECK-MVE-NEXT: vmovx.f16 s22, s10
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
+; CHECK-MVE-NEXT: vcmp.f16 s3, s19
+; CHECK-MVE-NEXT: vmov r0, s20
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov.16 q3[5], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s2, s19
+; CHECK-MVE-NEXT: vcmp.f16 s0, s2
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s20, s11, s7
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it eq
+; CHECK-MVE-NEXT: moveq r1, #1
+; CHECK-MVE-NEXT: vmov r0, s20
+; CHECK-MVE-NEXT: cmp r1, #0
+; CHECK-MVE-NEXT: vmov.16 q3[6], r0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: vmovx.f16 s0, s7
+; CHECK-MVE-NEXT: vmovx.f16 s2, s11
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
+; CHECK-MVE-NEXT: vmov r0, s0
+; CHECK-MVE-NEXT: vmov.16 q3[7], r0
+; CHECK-MVE-NEXT: vmov q0, q3
+; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-MVE-NEXT: bx lr
+;
+; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
+; CHECK-MVEFP: @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT: ldrh r0, [r0]
+; CHECK-MVEFP-NEXT: vdup.16 q3, r0
+; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: bx lr
+entry:
+ %src2 = load half, half* %src2p
+ %src2bc = bitcast half %src2 to i16
+ %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
+ %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %sp = bitcast <8 x i16> %spbc to <8 x half>
+ %c = fcmp oeq <8 x half> %src, %sp
+ %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %s
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index f855b121590c..ae91b52e1d54 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -75,6 +75,36 @@ entry:
ret <4 x float> %out
}
+define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
+; CHECK-LABEL: vdup_f32_1bc:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %srcbc = bitcast float %src to i32
+ %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
+ %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
+ %outbc = bitcast <4 x i32> %out to <4 x float>
+ ret <4 x float> %outbc
+}
+
+define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
+; CHECK-LABEL: vdup_f32_2bc:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vadd.f32 s0, s0, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = fadd float %src1, %src2
+ %bc = bitcast float %0 to i32
+ %1 = insertelement <4 x i32> undef, i32 %bc, i32 0
+ %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+ %outbc = bitcast <4 x i32> %out to <4 x float>
+ ret <4 x float> %outbc
+}
+
; TODO: Calling convention needs fixing to pass half types directly to functions
define arm_aapcs_vfpcc <8 x half> @vdup_f16(half* %src1, half* %src2) {
; CHECK-LABEL: vdup_f16:
@@ -94,6 +124,30 @@ entry:
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half* %src1, half* %src2) {
+; CHECK-LABEL: vdup_f16_bc:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vldr.16 s0, [r1]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vadd.f16 s0, s2, s0
+; CHECK-NEXT: vstr.16 s0, [sp, #2]
+; CHECK-NEXT: ldrh.w r0, [sp, #2]
+; CHECK-NEXT: vdup.16 q0, r0
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = load half, half *%src1, align 2
+ %1 = load half, half *%src2, align 2
+ %2 = fadd half %0, %1
+ %bc = bitcast half %2 to i16
+ %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
+ %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+ %outbc = bitcast <8 x i16> %out to <8 x half>
+ ret <8 x half> %outbc
+}
+
define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
; CHECK-LABEL: vdup_f64:
; CHECK: @ %bb.0: @ %entry
@@ -185,3 +239,46 @@ entry:
%out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
ret <2 x double> %out
}
+
+
+define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
+; CHECK-LABEL: vdup_f32_extract:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: bx lr
+entry:
+ %srcbc = bitcast float %src to i32
+ %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
+ %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
+ %outbc = bitcast <4 x i32> %out to <4 x float>
+ %ext = extractelement <4 x float> %outbc, i32 2
+ ret float %ext
+}
+
+define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
+; CHECK-LABEL: vdup_f16_extract:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vldr.16 s0, [r2]
+; CHECK-NEXT: vldr.16 s2, [r1]
+; CHECK-NEXT: vadd.f16 s0, s2, s0
+; CHECK-NEXT: vstr.16 s0, [sp, #2]
+; CHECK-NEXT: ldrh.w r1, [sp, #2]
+; CHECK-NEXT: vdup.16 q0, r1
+; CHECK-NEXT: vstr.16 s1, [r0]
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = load half, half *%src1, align 2
+ %1 = load half, half *%src2, align 2
+ %2 = fadd half %0, %1
+ %bc = bitcast half %2 to i16
+ %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
+ %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+ %outbc = bitcast <8 x i16> %out to <8 x half>
+ %ext = extractelement <8 x half> %outbc, i32 2
+ ret half %ext
+}
More information about the llvm-commits
mailing list