[llvm] r323048 - [X86] Add an override of targetShrinkDemandedConstant to limit the damage that shrinkdemandedbits can do to zext_in_reg operations
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 20 10:50:09 PST 2018
Author: ctopper
Date: Sat Jan 20 10:50:09 2018
New Revision: 323048
URL: http://llvm.org/viewvc/llvm-project?rev=323048&view=rev
Log:
[X86] Add an override of targetShrinkDemandedConstant to limit the damage that shrinkdemandedbits can do to zext_in_reg operations
Summary:
This patch adds an implementation of targetShrinkDemandedConstant that tries to keep shrinkdemandedbits from removing bits that would otherwise have been recognized as a movzx.
We still need a follow patch to stop moving ands across srl if the and could be represented as a movzx before the shift but not after. I think this should help with some of the cases that D42088 ended up removing during isel.
Reviewers: spatel, RKSimon
Reviewed By: spatel
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42265
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/test/CodeGen/X86/3addr-or.ll
llvm/trunk/test/CodeGen/X86/popcnt.ll
llvm/trunk/test/CodeGen/X86/pr21792.ll
llvm/trunk/test/CodeGen/X86/zext-demanded.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 20 10:50:09 2018
@@ -27884,6 +27884,65 @@ X86TargetLowering::EmitInstrWithCustomIn
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
+bool
+X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
+ // Only optimize Ands to prevent shrinking a constant that could be
+ // matched by movzx.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ EVT VT = Op.getValueType();
+
+ // Ignore vectors.
+ if (VT.isVector())
+ return false;
+
+ unsigned Size = VT.getSizeInBits();
+
+ // Make sure the RHS really is a constant.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ const APInt &Mask = C->getAPIntValue();
+
+ // Clear all non-demanded bits initially.
+ APInt ShrunkMask = Mask & Demanded;
+
+ // Find the width of the shrunk mask.
+ unsigned Width = ShrunkMask.getActiveBits();
+
+ // If the mask is all 0s there's nothing to do here.
+ if (Width == 0)
+ return false;
+
+ // Find the next power of 2 width, rounding up to a byte.
+ Width = PowerOf2Ceil(std::max(Width, 8U));
+ // Truncate the width to size to handle illegal types.
+ Width = std::min(Width, Size);
+
+ // Calculate a possible zero extend mask for this constant.
+ APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width);
+
+ // If we aren't changing the mask, just return true to keep it and prevent
+ // the caller from optimizing.
+ if (ZeroExtendMask == Mask)
+ return true;
+
+ // Make sure the bits in the ZeroExtendMask are also set in the original mask.
+ // TODO: We should be able to set bits that aren't demanded too.
+ if (!ZeroExtendMask.isSubsetOf(Mask))
+ return false;
+
+ // Replace the constant with the zero extend mask.
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+}
+
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 20 10:50:09 2018
@@ -835,6 +835,9 @@ namespace llvm {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
/// Determine which of the bits specified in Mask are known to be either
/// zero or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Sat Jan 20 10:50:09 2018
@@ -1514,6 +1514,10 @@ def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;
+def immff00_ffff : ImmLeaf<i32, [{
+ return Imm >= 0xff00 && Imm <= 0xffff;
+}]>;
+
// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>,
@@ -1534,7 +1538,7 @@ def : Pat<(i32 (anyext (srl_su GR16:$src
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
// h-register tricks.
Modified: llvm/trunk/test/CodeGen/X86/3addr-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3addr-or.ll?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3addr-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3addr-or.ll Sat Jan 20 10:50:09 2018
@@ -14,16 +14,18 @@ define i32 @test1(i32 %x) nounwind ssp {
ret i32 %t1
}
+; This test no longer requires or to be converted to 3 addr form because we are
+; are able to use a zero extend instead of an 'and' which gives the register
+; allocator freedom.
define i64 @test2(i8 %A, i8 %B) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def %esi killed %esi def %rsi
; CHECK-NEXT: # kill: def %edi killed %edi def %rdi
; CHECK-NEXT: shll $4, %edi
; CHECK-NEXT: andl $48, %edi
-; CHECK-NEXT: andl $240, %esi
-; CHECK-NEXT: shrq $4, %rsi
-; CHECK-NEXT: leaq (%rsi,%rdi), %rax
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: shrq $4, %rax
+; CHECK-NEXT: orq %rdi, %rax
; CHECK-NEXT: retq
%C = zext i8 %A to i64
%D = shl i64 %C, 4
Modified: llvm/trunk/test/CodeGen/X86/popcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt.ll?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt.ll Sat Jan 20 10:50:09 2018
@@ -71,7 +71,6 @@ define i16 @cnt16(i16 %x) nounwind readn
; X32-NEXT: andl $13107, %eax # imm = 0x3333
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $-16, %ecx
; X32-NEXT: shrl $4, %ecx
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: andl $3855, %ecx # imm = 0xF0F
@@ -94,7 +93,6 @@ define i16 @cnt16(i16 %x) nounwind readn
; X64-NEXT: andl $13107, %edi # imm = 0x3333
; X64-NEXT: addl %eax, %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $-16, %eax
; X64-NEXT: shrl $4, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: andl $3855, %eax # imm = 0xF0F
Modified: llvm/trunk/test/CodeGen/X86/pr21792.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr21792.ll?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr21792.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr21792.ll Sat Jan 20 10:50:09 2018
@@ -12,19 +12,18 @@ define void @func(<4 x float> %vx) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: pextrq $1, %xmm0, %rdx
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: shrq $32, %rcx
-; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: movq %rax, %r9
+; CHECK-NEXT: pextrq $1, %xmm0, %rax
+; CHECK-NEXT: movzwl %ax, %ecx
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: movq %xmm0, %rdx
+; CHECK-NEXT: movzwl %dx, %r8d
+; CHECK-NEXT: movq %rdx, %r9
; CHECK-NEXT: shrq $32, %r9
-; CHECK-NEXT: andl $2032, %eax # imm = 0x7F0
-; CHECK-NEXT: leaq stuff(%rax), %rdi
+; CHECK-NEXT: leaq stuff(%r8), %rdi
; CHECK-NEXT: leaq stuff(%r9), %rsi
-; CHECK-NEXT: andl $2032, %edx # imm = 0x7F0
-; CHECK-NEXT: leaq stuff(%rdx), %rdx
-; CHECK-NEXT: leaq stuff(%rcx), %rcx
-; CHECK-NEXT: leaq stuff+8(%rax), %r8
+; CHECK-NEXT: leaq stuff(%rcx), %rdx
+; CHECK-NEXT: leaq stuff(%rax), %rcx
+; CHECK-NEXT: leaq stuff+8(%r8), %r8
; CHECK-NEXT: leaq stuff+8(%r9), %r9
; CHECK-NEXT: callq toto
; CHECK-NEXT: popq %rax
Modified: llvm/trunk/test/CodeGen/X86/zext-demanded.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-demanded.ll?rev=323048&r1=323047&r2=323048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/zext-demanded.ll (original)
+++ llvm/trunk/test/CodeGen/X86/zext-demanded.ll Sat Jan 20 10:50:09 2018
@@ -5,25 +5,22 @@
; demanded bits shortcomings.
; The backend will insert a zext to promote the shift to i32.
-; TODO: we should be able to use movzx here.
define i16 @test1(i16 %x) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
-; CHECK-NEXT: shrl %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: # kill: def %ax killed %ax killed %eax
; CHECK-NEXT: retq
%y = lshr i16 %x, 1
ret i16 %y
}
-; TODO: we should be able to use movzx here.
define i32 @test2(i32 %x) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
-; CHECK-NEXT: shrl %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: shrl %eax
; CHECK-NEXT: retq
%y = and i32 %x, 65535
%z = lshr i32 %y, 1
More information about the llvm-commits
mailing list