[llvm] b0045f5 - [ARM] Fix a bug in finding a pair of extracts to create VMOVRRD

Wed Oct 6 10:03:46 PDT 2021

Author: Pengxuan Zheng
Date: 2021-10-06T10:03:32-07:00
New Revision: b0045f559549e993629a7d541b3b1df3f8144814

URL: https://github.com/llvm/llvm-project/commit/b0045f559549e993629a7d541b3b1df3f8144814
DIFF: https://github.com/llvm/llvm-project/commit/b0045f559549e993629a7d541b3b1df3f8144814.diff

LOG: [ARM] Fix a bug in finding a pair of extracts to create VMOVRRD

D100244 missed a check on the ResNo of the extract's operand 0 when finding a
pair of extracts to combine into a VMOVRRD (extract(x, n); extract(x, n+1) ->
VMOVRRD(extract x, n/2)). As a result, it can incorrectly pair an extract(x, n)
with another extract(x:3, n+1) for example. This patch fixes the bug by adding
the proper check on ResNo.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D111188

Added: 
    llvm/test/CodeGen/ARM/vector-extract.ll

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f181106df3a4..03cbe48dca2f 100644

--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14968,6 +14968,7 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
 
   SDValue Op0 = Ext.getOperand(0);
   EVT VecVT = Op0.getValueType();
+  unsigned ResNo = Op0.getResNo();
   unsigned Lane = Ext.getConstantOperandVal(1);
   if (VecVT.getVectorNumElements() != 4)
     return SDValue();
@@ -14976,7 +14977,8 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
     return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
            isa<ConstantSDNode>(V->getOperand(1)) &&
-           V->getConstantOperandVal(1) == Lane + 1;
+           V->getConstantOperandVal(1) == Lane + 1 &&
+           V->getOperand(0).getResNo() == ResNo;
   });
   if (OtherIt == Op0->uses().end())
     return SDValue();

diff  --git a/llvm/test/CodeGen/ARM/vector-extract.ll b/llvm/test/CodeGen/ARM/vector-extract.ll
new file mode 100644
index 000000000000..59eaf3a70b21
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-extract.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv7a-none-eabi -mattr=+neon %s -o - | FileCheck %s
+
+; Check that the two extracts are not combined into a vmov.
+
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+
+define i32 @vld4Qi32(i32* %A) nounwind {
+; CHECK-LABEL: vld4Qi32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld4.32 {d16, d18, d20, d22}, [r0]!
+; CHECK-NEXT:    vld4.32 {d17, d19, d21, d23}, [r0]
+; CHECK-NEXT:    vmov.32 r0, d18[1]
+; CHECK-NEXT:    vmov.32 r1, d16[0]
+; CHECK-NEXT:    add r0, r1, r0
+; CHECK-NEXT:    bx lr
+        %tmp0 = bitcast i32* %A to i8*
+        %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractelement <4 x i32> %tmp2, i32 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 1
+        %tmp5 = extractelement <4 x i32> %tmp4, i32 1
+        %tmp6 = add i32 %tmp3, %tmp5
+        ret i32 %tmp6
+}
+
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8*, i32) nounwind readonly