[PATCH] D78706: [ARM] Convert a bitcast VDUP to a VDUP

Thu Apr 23 05:21:59 PDT 2020

dmgreen created this revision.
dmgreen added reviewers: samparker, SjoerdMeijer, simon_tatham, ostannard.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.

The idea, under MVE, is to introduce more bitcasts around VDUP's in an attempt to get the type correct across basic block boundaries. In order to do that without other regressions we need a few fixups, of which this is the first. If the code is a bitcast of a VDUP, we can convert that straight into a VDUP of the new type, so long as they have the same size.


https://reviews.llvm.org/D78706

Files:
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/test/CodeGen/Thumb2/mve-vaddqr.ll


Index: llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
===================================================================

--- llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
+++ llvm/test/CodeGen/Thumb2/mve-vaddqr.ll
@@ -131,8 +131,7 @@
 ; CHECK-LABEL: vaddqr_v4f32_3:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov r0, s4
-; CHECK-NEXT:    vdup.32 q1, r0
-; CHECK-NEXT:    vadd.f32 q0, q0, q1
+; CHECK-NEXT:    vadd.f32 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
   %src2bc = bitcast float %src2 to i32
@@ -147,8 +146,7 @@
 ; CHECK-LABEL: vaddqr_v8f16_3:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    ldrh r0, [r0]
-; CHECK-NEXT:    vdup.16 q1, r0
-; CHECK-NEXT:    vadd.f16 q0, q0, q1
+; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
   %src2 = load half, half *%src2p, align 2
@@ -164,8 +162,7 @@
 ; CHECK-LABEL: vaddqr_v4f32_4:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov r0, s4
-; CHECK-NEXT:    vdup.32 q1, r0
-; CHECK-NEXT:    vadd.f32 q0, q1, q0
+; CHECK-NEXT:    vadd.f32 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
   %src2bc = bitcast float %src2 to i32
@@ -180,8 +177,7 @@
 ; CHECK-LABEL: vaddqr_v8f16_4:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    ldrh r0, [r0]
-; CHECK-NEXT:    vdup.16 q1, r0
-; CHECK-NEXT:    vadd.f16 q0, q1, q0
+; CHECK-NEXT:    vadd.f16 q0, q0, r0
 ; CHECK-NEXT:    bx lr
 entry:
   %src2 = load half, half *%src2p, align 2
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15197,8 +15197,17 @@
   return Res;
 }
 
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+                                    const ARMSubtarget *ST) {
   SDValue Src = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+
+  // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
+  if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
+    EVT SrcVT = Src.getValueType();
+    if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
+      return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
+  }
 
   // We may have a bitcast of something that has already had this bitcast
   // combine performed on it, so skip past any VECTOR_REG_CASTs.
@@ -15208,7 +15217,6 @@
   // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
   // would be generated is at least the width of the element type.
   EVT SrcVT = Src.getValueType();
-  EVT DstVT = N->getValueType(0);
   if ((Src.getOpcode() == ARMISD::VMOVIMM ||
        Src.getOpcode() == ARMISD::VMVNIMM ||
        Src.getOpcode() == ARMISD::VMOVFPIMM) &&
@@ -15273,7 +15281,7 @@
   case ARMISD::BUILD_VECTOR:
     return PerformARMBUILD_VECTORCombine(N, DCI);
   case ISD::BITCAST:
-    return PerformBITCASTCombine(N, DCI.DAG);
+    return PerformBITCASTCombine(N, DCI.DAG, Subtarget);
   case ARMISD::PREDICATE_CAST:
     return PerformPREDICATE_CASTCombine(N, DCI);
   case ARMISD::VECTOR_REG_CAST:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D78706.259528.patch
Type: text/x-patch
Size: 3128 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200423/4e16f3b8/attachment-0001.bin>