[llvm] f5f83cf - [ARM] VMOVhr load -> vldr

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed May 6 07:46:11 PDT 2020


Author: David Green
Date: 2020-05-06T15:45:56+01:00
New Revision: f5f83cf4df3e75d8b88214928af19b64849c432a

URL: https://github.com/llvm/llvm-project/commit/f5f83cf4df3e75d8b88214928af19b64849c432a
DIFF: https://github.com/llvm/llvm-project/commit/f5f83cf4df3e75d8b88214928af19b64849c432a.diff

LOG: [ARM] VMOVhr load -> vldr

Much like the similar combine added recently for VMOVrh load, this
adds a fold for VMOVhr load turning it into a vldr.f16 as opposed to a
vldrh and vmov.f16.

Differential Revision: https://reviews.llvm.org/D78714

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3da56150d7c8..5e5cf5e076b6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13053,6 +13053,18 @@ static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &
     }
   }
 
+  // fold (VMOVhr (load x)) -> (load (f16*)x)
+  if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
+    if (LN0->hasOneUse() && LN0->isUnindexed() &&
+        LN0->getMemoryVT() == MVT::i16) {
+      SDValue Load = DCI.DAG.getLoad(MVT::f16, SDLoc(N), LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getMemOperand());
+      DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
+      DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
+      return Load;
+    }
+  }
+
   // Only the bottom 16 bits of the source register are used.
   APInt DemandedMask = APInt::getLowBitsSet(32, 16);
   const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index fdbede02b518..aefd250ab7bc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5176,105 +5176,104 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
 ; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
-; CHECK-MVE-NEXT:    ldrh r1, [r0]
+; CHECK-MVE-NEXT:    vldr.16 s16, [r0]
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
 ; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmov.f16 s16, r1
-; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
 ; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r1, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    movs r2, #0
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
 ; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r1, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
 ; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
 ; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vmov r2, s12
+; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
 ; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r1
 ; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    cset r1, ne
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
-; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmov r0, s18
 ; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
 ; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r1
+; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
 ; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
 ; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmov r0, s18
 ; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r1
-; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    cset r1, ne
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
-; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmov r0, s18
 ; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
 ; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r1
+; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
 ; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
 ; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmov r0, s18
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r1
-; CHECK-MVE-NEXT:    mov.w r1, #0
+; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
+; CHECK-MVE-NEXT:    mov.w r0, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
-; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    lsls r0, r0, #31
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
 ; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    vmov r0, s18
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
 ; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmov r1, s18
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
 ; CHECK-MVE-NEXT:    vmov.16 q3[7], r0


        


More information about the llvm-commits mailing list