[llvm] a349949 - [ARM] Extract from a VDUP

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed May 6 06:52:11 PDT 2020


Author: David Green
Date: 2020-05-06T14:51:25+01:00
New Revision: a349949f8ab15c50750880001b4c132aef29da25

URL: https://github.com/llvm/llvm-project/commit/a349949f8ab15c50750880001b4c132aef29da25
DIFF: https://github.com/llvm/llvm-project/commit/a349949f8ab15c50750880001b4c132aef29da25.diff

LOG: [ARM] Extract from a VDUP

If we get into the situation where we are extracting from a VDUP, the
extracted value is just the origin, so long as the types match or we can
bitcast between the two.

Differential Revision: https://reviews.llvm.org/D78708

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
    llvm/test/CodeGen/Thumb2/mve-vdup.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bb39f7ff34c1..a1ca6d9b3af9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -934,6 +934,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::BUILD_VECTOR);
     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+    setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
     setTargetDAGCombine(ISD::STORE);
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -13313,6 +13314,29 @@ static SDValue PerformInsertEltCombine(SDNode *N,
   return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
 }
 
+static SDValue PerformExtractEltCombine(SDNode *N,
+                                        TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  SDLoc dl(N);
+
+  // extract (vdup x) -> x
+  if (Op0->getOpcode() == ARMISD::VDUP) {
+    SDValue X = Op0->getOperand(0);
+    if (VT == MVT::f16 && X.getValueType() == MVT::i32)
+      return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
+    if (VT == MVT::i32 && X.getValueType() == MVT::f16)
+      return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
+
+    while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
+      X = X->getOperand(0);
+    if (X.getValueType() == VT)
+      return X;
+  }
+
+  return SDValue();
+}
+
 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
 /// ISD::VECTOR_SHUFFLE.
 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
@@ -15301,6 +15325,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::STORE:      return PerformSTORECombine(N, DCI, Subtarget);
   case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+  case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
index 9093a09a119e..fdbede02b518 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
@@ -5174,116 +5174,112 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
 ; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    .vsave {d8, d9, d10, d11}
-; CHECK-MVE-NEXT:    vpush {d8, d9, d10, d11}
-; CHECK-MVE-NEXT:    ldrh r0, [r0]
+; CHECK-MVE-NEXT:    .vsave {d8, d9, d10}
+; CHECK-MVE-NEXT:    vpush {d8, d9, d10}
+; CHECK-MVE-NEXT:    ldrh r1, [r0]
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s0
+; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
 ; CHECK-MVE-NEXT:    movs r2, #0
-; CHECK-MVE-NEXT:    movs r1, #0
-; CHECK-MVE-NEXT:    vdup.16 q4, r0
+; CHECK-MVE-NEXT:    vmovx.f16 s20, s9
 ; CHECK-MVE-NEXT:    movs r0, #0
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s16
-; CHECK-MVE-NEXT:    vmovx.f16 s22, s17
-; CHECK-MVE-NEXT:    vcmp.f16 s12, s14
+; CHECK-MVE-NEXT:    vmov.f16 s16, r1
+; CHECK-MVE-NEXT:    movs r1, #0
+; CHECK-MVE-NEXT:    vcmp.f16 s12, s16
 ; CHECK-MVE-NEXT:    vmovx.f16 s12, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
 ; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s14, s8
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    vseleq.f16 s12, s14, s12
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r2, #1
 ; CHECK-MVE-NEXT:    cmp r2, #0
 ; CHECK-MVE-NEXT:    cset r2, ne
-; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov r1, s12
 ; CHECK-MVE-NEXT:    lsls r2, r2, #31
-; CHECK-MVE-NEXT:    vcmp.f16 s1, s17
+; CHECK-MVE-NEXT:    vcmp.f16 s1, s16
 ; CHECK-MVE-NEXT:    vseleq.f16 s12, s8, s4
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-MVE-NEXT:    vmov r2, s12
 ; CHECK-MVE-NEXT:    vmov.16 q3[0], r2
-; CHECK-MVE-NEXT:    vmov.16 q3[1], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
-; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s20, s9, s5
-; CHECK-MVE-NEXT:    vmov r0, s20
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s1
-; CHECK-MVE-NEXT:    vcmp.f16 s20, s22
-; CHECK-MVE-NEXT:    vmov.16 q3[2], r0
-; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov.16 q3[1], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s5
-; CHECK-MVE-NEXT:    vmovx.f16 s22, s9
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s20, s22, s20
-; CHECK-MVE-NEXT:    vcmp.f16 s2, s18
-; CHECK-MVE-NEXT:    vmov r0, s20
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s18, s9, s5
+; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmovx.f16 s18, s1
+; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
+; CHECK-MVE-NEXT:    vmov.16 q3[2], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov.16 q3[3], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s22, s18
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s20, s10, s6
-; CHECK-MVE-NEXT:    vmov r0, s20
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s2
-; CHECK-MVE-NEXT:    vcmp.f16 s20, s22
-; CHECK-MVE-NEXT:    vmov.16 q3[4], r0
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s18, s5
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s2, s16
+; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmovx.f16 s20, s10
+; CHECK-MVE-NEXT:    vmov.16 q3[3], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s20, s6
-; CHECK-MVE-NEXT:    vmovx.f16 s22, s10
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s20, s22, s20
-; CHECK-MVE-NEXT:    vcmp.f16 s3, s19
-; CHECK-MVE-NEXT:    vmov r0, s20
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vseleq.f16 s18, s10, s6
+; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vmovx.f16 s18, s2
+; CHECK-MVE-NEXT:    vcmp.f16 s18, s16
+; CHECK-MVE-NEXT:    vmov.16 q3[4], r1
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-MVE-NEXT:    vmov.16 q3[5], r0
-; CHECK-MVE-NEXT:    mov.w r0, #0
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
-; CHECK-MVE-NEXT:    moveq r0, #1
-; CHECK-MVE-NEXT:    cmp r0, #0
-; CHECK-MVE-NEXT:    cset r0, ne
-; CHECK-MVE-NEXT:    vmovx.f16 s2, s19
-; CHECK-MVE-NEXT:    vcmp.f16 s0, s2
-; CHECK-MVE-NEXT:    lsls r0, r0, #31
-; CHECK-MVE-NEXT:    vseleq.f16 s20, s11, s7
+; CHECK-MVE-NEXT:    moveq r1, #1
+; CHECK-MVE-NEXT:    cmp r1, #0
+; CHECK-MVE-NEXT:    cset r1, ne
+; CHECK-MVE-NEXT:    vmovx.f16 s18, s6
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
+; CHECK-MVE-NEXT:    vcmp.f16 s3, s16
+; CHECK-MVE-NEXT:    vseleq.f16 s18, s20, s18
 ; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    vmov r1, s18
+; CHECK-MVE-NEXT:    vcmp.f16 s0, s16
+; CHECK-MVE-NEXT:    vmov.16 q3[5], r1
+; CHECK-MVE-NEXT:    mov.w r1, #0
 ; CHECK-MVE-NEXT:    it eq
 ; CHECK-MVE-NEXT:    moveq r1, #1
-; CHECK-MVE-NEXT:    vmov r0, s20
 ; CHECK-MVE-NEXT:    cmp r1, #0
-; CHECK-MVE-NEXT:    vmov.16 q3[6], r0
-; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    cset r1, ne
 ; CHECK-MVE-NEXT:    vmovx.f16 s0, s7
+; CHECK-MVE-NEXT:    lsls r1, r1, #31
 ; CHECK-MVE-NEXT:    vmovx.f16 s2, s11
+; CHECK-MVE-NEXT:    vseleq.f16 s18, s11, s7
+; CHECK-MVE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT:    it eq
+; CHECK-MVE-NEXT:    moveq r0, #1
+; CHECK-MVE-NEXT:    cmp r0, #0
+; CHECK-MVE-NEXT:    cset r0, ne
+; CHECK-MVE-NEXT:    vmov r1, s18
 ; CHECK-MVE-NEXT:    lsls r0, r0, #31
+; CHECK-MVE-NEXT:    vmov.16 q3[6], r1
 ; CHECK-MVE-NEXT:    vseleq.f16 s0, s2, s0
 ; CHECK-MVE-NEXT:    vmov r0, s0
 ; CHECK-MVE-NEXT:    vmov.16 q3[7], r0
 ; CHECK-MVE-NEXT:    vmov q0, q3
-; CHECK-MVE-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-MVE-NEXT:    vpop {d8, d9, d10}
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 78bd610958d7..de680e6fa3e1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -240,9 +240,6 @@ entry:
 define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
 ; CHECK-LABEL: vdup_f32_extract:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    vdup.32 q0, r0
-; CHECK-NEXT:    vmov.f32 s0, s2
 ; CHECK-NEXT:    bx lr
 entry:
   %srcbc = bitcast float %src to i32
@@ -260,8 +257,8 @@ define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
 ; CHECK-NEXT:    vldr.16 s2, [r1]
 ; CHECK-NEXT:    vadd.f16 s0, s2, s0
 ; CHECK-NEXT:    vmov.f16 r1, s0
-; CHECK-NEXT:    vdup.16 q0, r1
-; CHECK-NEXT:    vstr.16 s1, [r0]
+; CHECK-NEXT:    vmov.f16 s0, r1
+; CHECK-NEXT:    vstr.16 s0, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = load half, half *%src1, align 2


        


More information about the llvm-commits mailing list