[llvm] ae9a346 - [ARM] Fix DAG combine loop in reduction distribution
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 12 08:37:48 PDT 2021
Author: David Green
Date: 2021-08-12T16:37:39+01:00
New Revision: ae9a346ef8620851b54682b4b6738788af709578
URL: https://github.com/llvm/llvm-project/commit/ae9a346ef8620851b54682b4b6738788af709578
DIFF: https://github.com/llvm/llvm-project/commit/ae9a346ef8620851b54682b4b6738788af709578.diff
LOG: [ARM] Fix DAG combine loop in reduction distribution
Given a constant operand, the MVE and DAGCombine combines could fight,
each redistributing in the opposite order. Add a guard to the MVE
vecreduce distribution to prevent that.
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 79cf91f114ba9..9e7f40301d92e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13089,7 +13089,8 @@ static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
// add(add(X, vecreduce(Y)), vecreduce(Z))
// to make better use of vaddva style instructions.
if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
- IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1))) {
+ IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
+ !isa<ConstantSDNode>(N0)) {
SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
index 44b8005d495e0..465457bee0128 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
@@ -3772,6 +3772,53 @@ entry:
ret i8 %52
}
+
+define arm_aapcs_vfpcc i32 @add_two_const(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_two_const:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vaddv.u32 r0, q1
+; CHECK-NEXT: vaddva.u32 r0, q0
+; CHECK-NEXT: adds r0, #10
+; CHECK-NEXT: bx lr
+entry:
+ %a = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+ %b = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
+ %c = add i32 %a, %b
+ %d = add i32 %c, 10
+ ret i32 %d
+}
+
+define arm_aapcs_vfpcc i32 @add_two_const2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_two_const2:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vaddv.u32 r0, q1
+; CHECK-NEXT: vaddva.u32 r0, q0
+; CHECK-NEXT: adds r0, #10
+; CHECK-NEXT: bx lr
+entry:
+ %a = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+ %b = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
+ %c = add i32 %a, 10
+ %d = add i32 %c, %b
+ ret i32 %d
+}
+
+define arm_aapcs_vfpcc i32 @add_two_const3(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_two_const3:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vaddv.u32 r0, q0
+; CHECK-NEXT: vaddva.u32 r0, q1
+; CHECK-NEXT: adds r0, #20
+; CHECK-NEXT: bx lr
+entry:
+ %a = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+ %b = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
+ %c = add i32 %a, 10
+ %d = add i32 %b, 10
+ %e = add i32 %c, %d
+ ret i32 %e
+}
+
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
More information about the llvm-commits
mailing list