[llvm] 9632e15 - Match fixed width ISD::AVGFLOORS + ISD::AVGCEILS patterns (#86222)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 24 08:33:20 PDT 2024
Author: houndlord
Date: 2024-03-24T15:33:16Z
New Revision: 9632e1515c93453efc39752b1c9f32aedd358fbc
URL: https://github.com/llvm/llvm-project/commit/9632e1515c93453efc39752b1c9f32aedd358fbc
DIFF: https://github.com/llvm/llvm-project/commit/9632e1515c93453efc39752b1c9f32aedd358fbc.diff
LOG: Match fixed width ISD::AVGFLOORS + ISD::AVGCEILS patterns (#86222)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/hadd-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e27a8bb8fdacda..05b4ce3aaa2cae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,20 +2529,28 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
-// Attempt to form avgceilu(A, B) from (A | B) - ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
+static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
SDLoc DL(N);
+ SDValue A, B;
+
if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
- SDValue A, B;
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
m_SpecificInt(1))))) {
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
}
}
+ if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
+ if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
+ }
+ }
return SDValue();
}
@@ -2837,20 +2845,29 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return SDValue();
}
-// Attempt to form avgflooru(A, B) from (A & B) + ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGFLOORU(SDNode *N, SelectionDAG &DAG) {
+// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
+static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
SDLoc DL(N);
+ SDValue A, B;
+
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
- SDValue A, B;
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
m_SpecificInt(1))))) {
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
}
+ if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
+ if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
+ }
+ }
+
return SDValue();
}
@@ -2869,8 +2886,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
- // Try to match AVGFLOORU fixedwidth pattern
- if (SDValue V = combineFixedwidthToAVGFLOORU(N, DAG))
+ // Try to match AVGFLOOR fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
return V;
// fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3868,8 +3885,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
- // Try to match AVGCEILU fixedwidth pattern
- if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+ // Try to match AVGCEIL fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
return V;
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index e12502980790da..491bf40ea4aab0 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -341,6 +341,18 @@ define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %res
}
+define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_fixedwidth_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %or = or <8 x i16> %a0, %a1
+ %xor = xor <8 x i16> %a0, %a1
+ %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res = sub <8 x i16> %or, %srl
+ ret <8 x i16> %res
+}
+
define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: rhaddu_base:
; CHECK: // %bb.0:
@@ -879,6 +891,18 @@ define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %res
}
+define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_fixedwidth_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %and = and <8 x i16> %a0, %a1
+ %xor = xor <8 x i16> %a0, %a1
+ %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res = add <8 x i16> %and, %srl
+ ret <8 x i16> %res
+}
+
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
More information about the llvm-commits
mailing list