[llvm] 5fc619b - [DAG] Update ISD::AVG folds to use hasOperation to allow Custom matching prior to legalization
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 03:44:05 PDT 2024
Author: Simon Pilgrim
Date: 2024-03-26T10:41:07Z
New Revision: 5fc619b5ee227ab4d22319dad8d2ba6fc1660091
URL: https://github.com/llvm/llvm-project/commit/5fc619b5ee227ab4d22319dad8d2ba6fc1660091
DIFF: https://github.com/llvm/llvm-project/commit/5fc619b5ee227ab4d22319dad8d2ba6fc1660091.diff
LOG: [DAG] Update ISD::AVG folds to use hasOperation to allow Custom matching prior to legalization
Fixes issue where AVX1 targets weren't matching 256-bit AVGCEILU cases.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/avgceilu.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6f1b1080cc8801..6f7a2a8a4c1493 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -609,6 +609,9 @@ namespace {
SDValue &CC, bool MatchStrict = false) const;
bool isOneUseSetCC(SDValue N) const;
+ SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
+ SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
+
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
@@ -2530,26 +2533,22 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
}
// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
- SDLoc DL(N);
SDValue A, B;
- if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
- if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
- m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
- m_SpecificInt(1))))) {
- return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
- }
+ if (hasOperation(ISD::AVGCEILU, VT) &&
+ sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+ m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
}
- if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
- if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
- m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
- m_SpecificInt(1))))) {
- return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
- }
+ if (hasOperation(ISD::AVGCEILS, VT) &&
+ sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
}
return SDValue();
}
@@ -2846,26 +2845,22 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
- SDLoc DL(N);
SDValue A, B;
- if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
- if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
- m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
- m_SpecificInt(1))))) {
- return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
- }
+ if (hasOperation(ISD::AVGFLOORU, VT) &&
+ sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
- if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
- if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
- m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
- m_SpecificInt(1))))) {
- return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
- }
+ if (hasOperation(ISD::AVGFLOORS, VT) &&
+ sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
}
return SDValue();
@@ -2887,7 +2882,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return V;
// Try to match AVGFLOOR fixedwidth pattern
- if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
+ if (SDValue V = foldAddToAvg(N, DL))
return V;
// fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3886,7 +3881,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return V;
// Try to match AVGCEIL fixedwidth pattern
- if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
+ if (SDValue V = foldSubToAvg(N, DL))
return V;
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
diff --git a/llvm/test/CodeGen/X86/avgceilu.ll b/llvm/test/CodeGen/X86/avgceilu.ll
index 3a74fca2377370..dee1a5a720f90f 100644
--- a/llvm/test/CodeGen/X86/avgceilu.ll
+++ b/llvm/test/CodeGen/X86/avgceilu.ll
@@ -319,18 +319,11 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
;
; AVX1-LABEL: test_fixed_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm2
-; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpsubb %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpavgb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_fixed_v32i8:
@@ -392,15 +385,11 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
;
; AVX1-LABEL: test_fixed_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm2
-; AVX1-NEXT: vxorps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpsubw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpavgw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_fixed_v16i16:
@@ -959,29 +948,16 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
;
; AVX1-LABEL: test_fixed_v64i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm4
-; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm5
-; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm6
-; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3
-; AVX1-NEXT: vpsubb %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpsubb %xmm6, %xmm5, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
-; AVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpsubb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpavgb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpavgb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpavgb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_fixed_v64i8:
@@ -1054,24 +1030,16 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) {
;
; AVX1-LABEL: test_fixed_v32i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm4
-; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm5
-; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm3, %ymm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
-; AVX1-NEXT: vpsubw %xmm0, %xmm6, %xmm0
-; AVX1-NEXT: vpsubw %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
-; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpavgw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpavgw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpavgw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpavgw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_fixed_v32i16:
More information about the llvm-commits
mailing list