[llvm] [DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU (PR #85031)
Shourya Goel via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 09:51:53 PDT 2024
https://github.com/Sh0g0-1758 updated https://github.com/llvm/llvm-project/pull/85031
>From 070c44d559e4b714e045b3172bff32f8d87c7011 Mon Sep 17 00:00:00 2001
From: Sh0g0-1758 <shouryagoel10000 at gmail.com>
Date: Wed, 13 Mar 2024 11:25:00 +0530
Subject: [PATCH 1/3] Matched Fixed width Pattern for ISD::AVGCEILU
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 +++++++++++++++++++
llvm/test/CodeGen/AArch64/sub_combine.ll | 34 +++++++++++++++
2 files changed, 75 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sub_combine.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8ecc0627..935c79472d4d35 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,6 +2529,43 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+// Attempt to form avgceilu(A, B) from sub(or(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+ SDValue Or = N->getOperand(0);
+ SDValue Lshr = N->getOperand(1);
+ if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+ return SDValue();
+ SDValue Xor = Lshr.getOperand(0);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue Or1 = Or.getOperand(0);
+ SDValue Or2 = Or.getOperand(1);
+ SDValue Xor1 = Xor.getOperand(0);
+ SDValue Xor2 = Xor.getOperand(1);
+ if (Or1 == Xor2 and Or2 == Xor1) {
+ SDValue temp = Or1;
+ Or1 = Or2;
+ Or2 = temp;
+ } else if (Or1 != Xor1 or Or2 != Xor2)
+ return SDValue();
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+ if (!N1C or N1C->getAPIntValue() != 1)
+ return SDValue();
+ EVT VT = Or1.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (VT.isVector())
+ VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ else
+ VT = NVT;
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
+ return SDValue();
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
+}
+
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3896,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ // Try to match AVGCEILU fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+ return V;
+
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
new file mode 100644
index 00000000000000..f9df436a3db2a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sub_combine.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+
+define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1) {
+; CHECK-LABEL: sub_fixedwidth_i4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor w8, w0, w1
+; CHECK-NEXT: orr w9, w0, w1
+; CHECK-NEXT: and w8, w8, #0xe
+; CHECK-NEXT: sub w0, w9, w8, lsr #1
+; CHECK-NEXT: ret
+ %or = or i4 %a0, %a1
+ %xor = xor i4 %a0, %a1
+ %srl = lshr i4 %xor, 1
+ %res = sub i4 %or, %srl
+ ret i4 %res
+}
+
+define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushr v1.4s, v2.4s, #1
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %or = or <4 x i32> %a0, %a1
+ %xor = xor <4 x i32> %a0, %a1
+ %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
+ %res = sub <4 x i32> %or, %srl
+ ret <4 x i32> %res
+}
+
+
>From 37cda7f3c1f7ce2e5eca9d7c7b7f25ddfa23c3ae Mon Sep 17 00:00:00 2001
From: Shourya Goel <shouryagoel10000 at gmail.com>
Date: Wed, 13 Mar 2024 16:24:08 +0530
Subject: [PATCH 2/3] Replaced and with && and similar changes.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 935c79472d4d35..5ce64dfef9b6db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2531,10 +2531,10 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
// Attempt to form avgceilu(A, B) from sub(or(A, B), lshr(xor(A, B), 1))
static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+ assert(N->getOpcode() == ISD::SUB && "SUB node is required here");
SDValue Or = N->getOperand(0);
SDValue Lshr = N->getOperand(1);
- if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+ if (Or.getOpcode() != ISD::OR || Lshr.getOpcode() != ISD::SRL)
return SDValue();
SDValue Xor = Lshr.getOperand(0);
if (Xor.getOpcode() != ISD::XOR)
@@ -2543,11 +2543,11 @@ static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
SDValue Or2 = Or.getOperand(1);
SDValue Xor1 = Xor.getOperand(0);
SDValue Xor2 = Xor.getOperand(1);
- if (Or1 == Xor2 and Or2 == Xor1) {
+ if (Or1 == Xor2 && Or2 == Xor1) {
SDValue temp = Or1;
Or1 = Or2;
Or2 = temp;
- } else if (Or1 != Xor1 or Or2 != Xor2)
+ } else if (Or1 != Xor1 || Or2 != Xor2)
return SDValue();
// Is the right shift using an immediate value of 1?
ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
>From e59331c20e43f44557e14ab04396587e1f3ec054 Mon Sep 17 00:00:00 2001
From: Sh0g0-1758 <shouryagoel10000 at gmail.com>
Date: Thu, 14 Mar 2024 22:21:32 +0530
Subject: [PATCH 3/3] Changed VT and moved tests
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ---
llvm/test/CodeGen/AArch64/hadd-combine.ll | 14 ++++++--
llvm/test/CodeGen/AArch64/sub_combine.ll | 34 -------------------
3 files changed, 11 insertions(+), 42 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/sub_combine.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ce64dfef9b6db..f7f1420934b0e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2554,11 +2554,6 @@ static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
if (!N1C or N1C->getAPIntValue() != 1)
return SDValue();
EVT VT = Or1.getValueType();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- if (VT.isVector())
- VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
- else
- VT = NVT;
SDLoc DL(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index 2269d75cdbb9ed..e58649957025fe 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -329,9 +329,17 @@ define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
ret <8 x i16> %result
}
-
-
-
+define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %or = or <8 x i16> %a0, %a1
+ %xor = xor <8 x i16> %a0, %a1
+ %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res = sub <8 x i16> %or, %srl
+ ret <8 x i16> %res
+}
define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: rhaddu_base:
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
deleted file mode 100644
index f9df436a3db2a2..00000000000000
--- a/llvm/test/CodeGen/AArch64/sub_combine.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
-
-define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1) {
-; CHECK-LABEL: sub_fixedwidth_i4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: orr w9, w0, w1
-; CHECK-NEXT: and w8, w8, #0xe
-; CHECK-NEXT: sub w0, w9, w8, lsr #1
-; CHECK-NEXT: ret
- %or = or i4 %a0, %a1
- %xor = xor i4 %a0, %a1
- %srl = lshr i4 %xor, 1
- %res = sub i4 %or, %srl
- ret i4 %res
-}
-
-define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: sub_fixedwidth_v4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ushr v1.4s, v2.4s, #1
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ret
- %or = or <4 x i32> %a0, %a1
- %xor = xor <4 x i32> %a0, %a1
- %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
- %res = sub <4 x i32> %or, %srl
- ret <4 x i32> %res
-}
-
-
More information about the llvm-commits
mailing list