[llvm] c7bb366 - [X86] SimplifyDemandedBitsForTargetNode - fold MOVMSK(YMM) -> MOVMSK(XMM)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 06:43:22 PST 2022
Author: Simon Pilgrim
Date: 2022-01-28T14:42:53Z
New Revision: c7bb3665a1c4a06754e486d8567182821fa32b55
URL: https://github.com/llvm/llvm-project/commit/c7bb3665a1c4a06754e486d8567182821fa32b55
DIFF: https://github.com/llvm/llvm-project/commit/c7bb3665a1c4a06754e486d8567182821fa32b55.diff
LOG: [X86] SimplifyDemandedBitsForTargetNode - fold MOVMSK(YMM) -> MOVMSK(XMM)
If we don't demand the upper elements of the 256-bit vector, then just perform as a 128-bit vector
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-movmsk-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99ef69d551c3f..450e5947594bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41043,6 +41043,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ // See if we only demand bits from the lower 128-bit vector.
+ if (SrcVT.is256BitVector() &&
+ OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) {
+ SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
+ }
+
// Only demand the vector elements of the sign bits we need.
APInt KnownUndef, KnownZero;
APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 9e393a76a5b38..ca0e8db5db03e 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -153,12 +153,10 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: movmskps_demanded_concat_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT: vmovmskps %ymm0, %eax
+; CHECK-NEXT: vmovmskps %xmm0, %eax
; CHECK-NEXT: andl $3, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1)
More information about the llvm-commits
mailing list