[llvm] c2426fd - [X86][XOP] Add SimplifyDemandedVectorElts handling for xop shifts
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 12 04:43:27 PST 2022
Author: Simon Pilgrim
Date: 2022-01-12T12:43:13Z
New Revision: c2426fdcae629e04c85a5ad626838af3930e9fa7
URL: https://github.com/llvm/llvm-project/commit/c2426fdcae629e04c85a5ad626838af3930e9fa7
DIFF: https://github.com/llvm/llvm-project/commit/c2426fdcae629e04c85a5ad626838af3930e9fa7.diff
LOG: [X86][XOP] Add SimplifyDemandedVectorElts handling for xop shifts
Noticed while investigating how to improve funnel shift codegen
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-udiv.ll
llvm/test/CodeGen/X86/xop-shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 430d1c757f961..2f59b09ebd067 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40082,6 +40082,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
break;
}
+ case X86ISD::VPSHA:
+ case X86ISD::VPSHL: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ KnownZero = LHSZero;
+ break;
+ }
case X86ISD::KSHIFTL: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll
index 3c20d3156fedd..2906fe6da47b0 100644
--- a/llvm/test/CodeGen/X86/combine-udiv.ll
+++ b/llvm/test/CodeGen/X86/combine-udiv.ll
@@ -675,10 +675,11 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
;
; XOP-LABEL: combine_vec_udiv_nonuniform4:
; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
+; XOP-NEXT: movl $171, %eax
+; XOP-NEXT: vmovd %eax, %xmm1
+; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
; XOP-NEXT: movl $249, %eax
; XOP-NEXT: vmovd %eax, %xmm2
; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/xop-shifts.ll b/llvm/test/CodeGen/X86/xop-shifts.ll
index 180bcba6cde29..335b3ab7ed6af 100644
--- a/llvm/test/CodeGen/X86/xop-shifts.ll
+++ b/llvm/test/CodeGen/X86/xop-shifts.ll
@@ -4,7 +4,6 @@
define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: demandedelts_vpshab:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
@@ -18,7 +17,6 @@ define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: demandedelts_vpshld:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list