[llvm] f5f83cf - [ARM] VMOVhr load -> vldr
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed May 6 07:46:11 PDT 2020
Author: David Green
Date: 2020-05-06T15:45:56+01:00
New Revision: f5f83cf4df3e75d8b88214928af19b64849c432a
URL: https://github.com/llvm/llvm-project/commit/f5f83cf4df3e75d8b88214928af19b64849c432a
DIFF: https://github.com/llvm/llvm-project/commit/f5f83cf4df3e75d8b88214928af19b64849c432a.diff
LOG: [ARM] VMOVhr load -> vldr
Much like the similar combine added recently for VMOVrh load, this
adds a fold for VMOVhr load turning it into a vldr.f16 as opposed to a
vldrh and vmov.f16.
Differential Revision: https://reviews.llvm.org/D78714
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3da56150d7c8..5e5cf5e076b6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13053,6 +13053,18 @@ static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &
}
}
+ // fold (VMOVhr (load x)) -> (load (f16*)x)
+ if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
+ if (LN0->hasOneUse() && LN0->isUnindexed() &&
+ LN0->getMemoryVT() == MVT::i16) {
+ SDValue Load = DCI.DAG.getLoad(MVT::f16, SDLoc(N), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getMemOperand());
+ DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
+ DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+
// Only the bottom 16 bits of the source register are used.
APInt DemandedMask = APInt::getLowBitsSet(32, 16);
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index fdbede02b518..aefd250ab7bc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5176,105 +5176,104 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
-; CHECK-MVE-NEXT: ldrh r1, [r0]
+; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
-; CHECK-MVE-NEXT: vmovx.f16 s14, s8
-; CHECK-MVE-NEXT: movs r2, #0
-; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: movs r0, #0
-; CHECK-MVE-NEXT: vmov.f16 s16, r1
-; CHECK-MVE-NEXT: movs r1, #0
+; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
-; CHECK-MVE-NEXT: cset r1, ne
-; CHECK-MVE-NEXT: vmovx.f16 s0, s3
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: movs r2, #0
+; CHECK-MVE-NEXT: lsls r0, r0, #31
+; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
-; CHECK-MVE-NEXT: vmov r1, s12
+; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
+; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
-; CHECK-MVE-NEXT: vmov.16 q3[1], r1
; CHECK-MVE-NEXT: mov.w r1, #0
+; CHECK-MVE-NEXT: vmov.16 q3[1], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: cset r1, ne
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT: vmov r1, s18
+; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
-; CHECK-MVE-NEXT: vmov.16 q3[2], r1
+; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: mov.w r1, #0
+; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: cset r1, ne
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vmov r1, s18
+; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
-; CHECK-MVE-NEXT: vmov.16 q3[3], r1
-; CHECK-MVE-NEXT: mov.w r1, #0
+; CHECK-MVE-NEXT: vmov.16 q3[3], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: cset r1, ne
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT: vmov r1, s18
+; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
-; CHECK-MVE-NEXT: vmov.16 q3[4], r1
+; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: mov.w r1, #0
+; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: cset r1, ne
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT: vmov r1, s18
+; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
-; CHECK-MVE-NEXT: vmov.16 q3[5], r1
-; CHECK-MVE-NEXT: mov.w r1, #0
+; CHECK-MVE-NEXT: vmov.16 q3[5], r0
+; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r1, #1
-; CHECK-MVE-NEXT: cmp r1, #0
-; CHECK-MVE-NEXT: cset r1, ne
+; CHECK-MVE-NEXT: moveq r0, #1
+; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
-; CHECK-MVE-NEXT: lsls r1, r1, #31
+; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: moveq r0, #1
-; CHECK-MVE-NEXT: cmp r0, #0
+; CHECK-MVE-NEXT: moveq r1, #1
+; CHECK-MVE-NEXT: vmov r0, s18
+; CHECK-MVE-NEXT: cmp r1, #0
+; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
-; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: lsls r0, r0, #31
-; CHECK-MVE-NEXT: vmov.16 q3[6], r1
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
More information about the llvm-commits
mailing list