[llvm] 05f9877 - [X86] Add handling for shift_logical(select(icmp_uge(amt,BW),0,x),amt) -> avx2 shift(x,amt)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 12 07:22:57 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-12T15:22:37+01:00
New Revision: 05f987743170cbd9fc97699c7a9b352055de7300
URL: https://github.com/llvm/llvm-project/commit/05f987743170cbd9fc97699c7a9b352055de7300
DIFF: https://github.com/llvm/llvm-project/commit/05f987743170cbd9fc97699c7a9b352055de7300.diff
LOG: [X86] Add handling for shift_logical(select(icmp_uge(amt,BW),0,x),amt) -> avx2 shift(x,amt)
We need to catch this otherwise pre-AVX512 targets will fold this to shift_logical(and(icmp_ult(amt,BW),x),amt)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/combine-srl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0a11e2134c708..a731541ca7778 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48042,6 +48042,14 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1);
}
+ // fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt)
+ if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
+ ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
+ ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
+ SV == VT.getScalarSizeInBits()) {
+ return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1);
+ }
}
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
@@ -48176,6 +48184,14 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1);
}
+ // fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt)
+ if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
+ ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
+ ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
+ SV == VT.getScalarSizeInBits()) {
+ return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1);
+ }
}
// Only do this on the last DAG combine as it can interfere with other
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index c5ce1e0046ad0..8d8c1d26fc5ca 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -1044,19 +1044,10 @@ define <4 x i32> @combine_vec_shl_commuted_clamped(<4 x i32> %sh, <4 x i32> %amt
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX2-LABEL: combine_vec_shl_commuted_clamped:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: combine_vec_shl_commuted_clamped:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: combine_vec_shl_commuted_clamped:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
%shl = shl <4 x i32> %1, %amt
@@ -1112,4 +1103,4 @@ define <4 x i32> @combine_vec_shl_commuted_clamped1(<4 x i32> %sh, <4 x i32> %am
%shl = shl <4 x i32> %sh, %amt
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shl
ret <4 x i32> %1
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 78dcf6e743400..f2a9aa217f7ec 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -771,19 +771,10 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped(<4 x i32> %sh, <4 x i32> %am
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
; SSE41-NEXT: retq
;
-; AVX2-LABEL: combine_vec_lshr_commuted_clamped:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: combine_vec_lshr_commuted_clamped:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: combine_vec_lshr_commuted_clamped:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
%shr = lshr <4 x i32> %1, %amt
@@ -854,4 +845,4 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped1(<4 x i32> %sh, <4 x i32> %a
%shr = lshr <4 x i32> %sh, %amt
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shr
ret <4 x i32> %1
-}
\ No newline at end of file
+}
More information about the llvm-commits
mailing list