[llvm] 6457f42 - [DAGCombiner] Extend ISD::ABDS/U combine to handle more cases.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 17 05:34:07 PST 2022
Author: Paul Walker
Date: 2022-02-17T13:32:20Z
New Revision: 6457f42bde82fd9a514434c946b9d3fbe92a8619
URL: https://github.com/llvm/llvm-project/commit/6457f42bde82fd9a514434c946b9d3fbe92a8619
DIFF: https://github.com/llvm/llvm-project/commit/6457f42bde82fd9a514434c946b9d3fbe92a8619.diff
LOG: [DAGCombiner] Extend ISD::ABDS/U combine to handle more cases.
The current ABD combine doesn't quite work for SVE because only a
single scalable vector per scalar integer type is legal (e.g. for
i32, <vscale x 4 x i32> is the only legal scalable vector type).
This patch extends the combine to also trigger for the cases when
operand extension must be retained.
Differential Revision: https://reviews.llvm.org/D115739
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/neon-abd.ll
llvm/test/CodeGen/AArch64/sve-abd.ll
llvm/test/CodeGen/Thumb2/mve-vabdus.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9b156b2c4940..a0708336d26a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9505,18 +9505,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
return SDValue();
+ EVT VT = N->getValueType(0);
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
- // Check if the operands are of same type and valid size.
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
- if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
- return SDValue();
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- SDValue ABD =
- DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+ // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
+ // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
+ // NOTE: Extensions must be equivalent.
+ if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+ }
+
+ // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
+ // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
+ if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
+ return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
+
+ return SDValue();
}
SDValue DAGCombiner::visitABS(SDNode *N) {
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 94c25e945f63..0279c832391e 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -53,8 +53,7 @@ define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: sshr v1.4h, v1.4h, #8
-; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%a.sext = sext <4 x i8> %a to <4 x i16>
%b.sext = sext <4 x i8> %b to <4 x i16>
@@ -108,8 +107,7 @@ define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
-; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: abs v0.2s, v0.2s
+; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%a.sext = sext <2 x i16> %a to <2 x i32>
%b.sext = sext <2 x i16> %b to <2 x i32>
@@ -234,8 +232,7 @@ define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%a.zext = zext <4 x i8> %a to <4 x i16>
%b.zext = zext <4 x i8> %b to <4 x i16>
@@ -288,8 +285,7 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: abs v0.2s, v0.2s
+; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%a.zext = zext <2 x i16> %a to <2 x i32>
%b.zext = zext <2 x i16> %b to <2 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
index affd6d5b15f7..1bdff3a42db9 100644
--- a/llvm/test/CodeGen/AArch64/sve-abd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -24,11 +24,10 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
; CHECK-LABEL: sabd_b_promoted_ops:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: sub z0.b, z0.b, z1.b
-; CHECK-NEXT: abs z0.b, p2/m, z0.b
+; CHECK-NEXT: sabd z0.b, p2/m, z0.b, z1.b
; CHECK-NEXT: ret
%a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@@ -57,8 +56,7 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
-; CHECK-NEXT: abs z0.h, p0/m, z0.h
+; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -87,8 +85,7 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -117,8 +114,7 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
-; CHECK-NEXT: abs z0.d, p0/m, z0.d
+; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -148,11 +144,10 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
; CHECK-LABEL: uabd_b_promoted_ops:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
-; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: add z0.b, z0.b, z1.b
-; CHECK-NEXT: abs z0.b, p2/m, z0.b
+; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1
+; CHECK-NEXT: uabd z0.b, p2/m, z0.b, z1.b
; CHECK-NEXT: ret
%a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@@ -178,11 +173,10 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) #0 {
; CHECK-LABEL: uabd_h_promoted_ops:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
-; CHECK-NEXT: abs z0.h, p0/m, z0.h
+; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -208,11 +202,10 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) #0 {
; CHECK-LABEL: uabd_s_promoted_ops:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -238,11 +231,10 @@ define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) #0 {
; CHECK-LABEL: uabd_d_promoted_ops:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
-; CHECK-NEXT: abs z0.d, p0/m, z0.d
+; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -251,6 +243,66 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
ret <vscale x 2 x i64> %abs
}
+; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and
+; %b have
diff ering types.
+define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <vscale x 4 x i8> %b) #0 {
+; CHECK-LABEL: uabd_non_matching_extension:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z1.s, z1.s, #0xff
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: uunpkhi z3.d, z1.s
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
+; CHECK-NEXT: sub z1.d, z2.d, z3.d
+; CHECK-NEXT: abs z1.d, p0/m, z1.d
+; CHECK-NEXT: abs z0.d, p0/m, z0.d
+; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
+ %b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64>
+ %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
+ %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
+ %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %trunc
+}
+
+; Test the situation where isLegal(ISD::ABD, typeof(%a.zext)) returns true but
+; %a and %b have
diff ering types.
+define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a, <vscale x 4 x i16> %b) #0 {
+; CHECK-LABEL: uabd_non_matching_promoted_ops:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
+ %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
+ %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
+ %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
+ ret <vscale x 4 x i32> %abs
+}
+
+; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and
+; %b are promoted
diff erently.
+define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) #0 {
+; CHECK-LABEL: uabd_non_matching_promotion:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
+; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
+ %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
+ %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
+ %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
+ ret <vscale x 4 x i32> %abs
+}
+
declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
index aa37c70718ac..5d93e5f179db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
@@ -21,8 +21,7 @@ define arm_aapcs_vfpcc <8 x i8> @vabd_v8s8(<8 x i8> %src1, <8 x i8> %src2) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovlb.s8 q1, q1
; CHECK-NEXT: vmovlb.s8 q0, q0
-; CHECK-NEXT: vsub.i16 q0, q0, q1
-; CHECK-NEXT: vabs.s16 q0, q0
+; CHECK-NEXT: vabd.s16 q0, q0, q1
; CHECK-NEXT: bx lr
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
@@ -74,8 +73,7 @@ define arm_aapcs_vfpcc <4 x i16> @vabd_v4s16(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovlb.s16 q1, q1
; CHECK-NEXT: vmovlb.s16 q0, q0
-; CHECK-NEXT: vsub.i32 q0, q0, q1
-; CHECK-NEXT: vabs.s32 q0, q0
+; CHECK-NEXT: vabd.s32 q0, q0, q1
; CHECK-NEXT: bx lr
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
@@ -158,8 +156,7 @@ define arm_aapcs_vfpcc <8 x i8> @vabd_v8u8(<8 x i8> %src1, <8 x i8> %src2) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovlb.u8 q1, q1
; CHECK-NEXT: vmovlb.u8 q0, q0
-; CHECK-NEXT: vsub.i16 q0, q0, q1
-; CHECK-NEXT: vabs.s16 q0, q0
+; CHECK-NEXT: vabd.u16 q0, q0, q1
; CHECK-NEXT: bx lr
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
@@ -210,8 +207,7 @@ define arm_aapcs_vfpcc <4 x i16> @vabd_v4u16(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovlb.u16 q1, q1
; CHECK-NEXT: vmovlb.u16 q0, q0
-; CHECK-NEXT: vsub.i32 q0, q0, q1
-; CHECK-NEXT: vabs.s32 q0, q0
+; CHECK-NEXT: vabd.u32 q0, q0, q1
; CHECK-NEXT: bx lr
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
More information about the llvm-commits
mailing list