[llvm] 11e415d - [ARM] Make v2f64 scalar_to_vector legal
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 8 03:35:14 PST 2021
Author: David Green
Date: 2021-02-08T11:34:55Z
New Revision: 11e415dc908ef66d656366e6b3fe854f68b6e38d
URL: https://github.com/llvm/llvm-project/commit/11e415dc908ef66d656366e6b3fe854f68b6e38d
DIFF: https://github.com/llvm/llvm-project/commit/11e415dc908ef66d656366e6b3fe854f68b6e38d.diff
LOG: [ARM] Make v2f64 scalar_to_vector legal
Because we mark all operations as expand for v2f64, scalar_to_vector
would end up lowering through a stack store/reload. But it is pretty
simple to implement, only inserting a D reg into an undef vector. This
helps clear up some inefficient codegen from soft calling conventions.
Differential Revision: https://reviews.llvm.org/D96153
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-extractstore.ll
llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4fdb5211ec2d..3c221d2db98f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -396,6 +396,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
// We can do bitwise operations on v2i64 vectors
setOperationAction(ISD::AND, MVT::v2i64, Legal);
setOperationAction(ISD::OR, MVT::v2i64, Legal);
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 8b0dadd4af24..c15031d7e1a5 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1909,6 +1909,8 @@ let Predicates = [HasMVEInt] in {
(VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
HPR)>;
+ def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
diff --git a/llvm/test/CodeGen/Thumb2/mve-extractstore.ll b/llvm/test/CodeGen/Thumb2/mve-extractstore.ll
index a28abe9d87eb..dd26ad181c90 100644
--- a/llvm/test/CodeGen/Thumb2/mve-extractstore.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-extractstore.ll
@@ -4,28 +4,15 @@
define half @extret1_f16_sf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
; CHECK-LABEL: extret1_f16_sf:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: bfc r4, #0, #4
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: strd r0, r1, [sp]
-; CHECK-NEXT: add.w r0, r7, #8
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: sub.w r4, r7, #8
-; CHECK-NEXT: ldr r0, [r7, #24]
-; CHECK-NEXT: vadd.f16 q0, q1, q0
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: ldr r0, [sp, #16]
+; CHECK-NEXT: vadd.f16 q0, q0, q1
; CHECK-NEXT: vmovx.f16 s0, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-NEXT: bx lr
%c = fadd <8 x half> %a, %b
%e = extractelement <8 x half> %c, i32 1
store half %e, half* %p, align 2
@@ -112,27 +99,14 @@ define arm_aapcs_vfpcc <8 x half> @extret4_v8f16_hf(<8 x half> %a, <8 x half> %b
define float @extret1_f32_sf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
; CHECK-LABEL: extret1_f32_sf:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: bfc r4, #0, #4
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: strd r0, r1, [sp]
-; CHECK-NEXT: add.w r0, r7, #8
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: ldr r1, [r7, #24]
-; CHECK-NEXT: sub.w r4, r7, #8
-; CHECK-NEXT: vadd.f32 q0, q1, q0
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: ldr r1, [sp, #16]
+; CHECK-NEXT: vadd.f32 q0, q0, q1
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vstr s1, [r1]
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-NEXT: bx lr
%c = fadd <4 x float> %a, %b
%e = extractelement <4 x float> %c, i32 1
store float %e, float* %p, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 8e98ab2425a8..10fae13908ee 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -1669,21 +1669,8 @@ entry:
define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
; CHECK-LABEL: insert_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: bfc r4, #0, #4
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: sub.w r4, r7, #8
-; CHECK-NEXT: vstr d0, [sp]
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop {r4, r6, r7, pc}
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: bx lr
entry:
%res = insertelement <2 x double> undef, double %a, i32 0
ret <2 x double> %res
More information about the llvm-commits
mailing list