[llvm] r337166 - [X86][AArch64][DAGCombine] Unfold 'check for [no] signed truncation' pattern
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 16 05:44:10 PDT 2018
Author: lebedevri
Date: Mon Jul 16 05:44:10 2018
New Revision: 337166
URL: http://llvm.org/viewvc/llvm-project?rev=337166&view=rev
Log:
[X86][AArch64][DAGCombine] Unfold 'check for [no] signed truncation' pattern
Summary:
[[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]]
As discussed in https://reviews.llvm.org/D49179#1158957 and later,
the IR for 'check for [no] signed truncation' pattern can be improved:
https://rise4fun.com/Alive/gBf
^ that pattern will be produced by Implicit Integer Truncation sanitizer,
https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530
in signed case, therefore it is probably a good idea to improve it.
But the IR-optimal patter does not lower efficiently, so we want to undo it..
This handles the simple pattern.
There is a second pattern with predicate and constants inverted.
NOTE: we do not check uses here. we always do the transform.
Reviewers: spatel, craig.topper, RKSimon, javed.absar
Reviewed By: spatel
Subscribers: kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D49266
Modified:
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll
llvm/trunk/test/CodeGen/X86/lack-of-signed-truncation-check.ll
llvm/trunk/test/CodeGen/X86/signed-truncation-check.ll
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Jul 16 05:44:10 2018
@@ -519,6 +519,19 @@ public:
return false;
}
+ /// Should we tranform the IR-optimal check for whether given truncation
+ /// down into KeptBits would be truncating or not:
+ /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+ /// Into it's more traditional form:
+ /// ((%x << C) a>> C) dstcond %x
+ /// Return true if we should transform.
+ /// Return false if there is no preference.
+ virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const {
+ // By default, let's assume that no one prefers shifts.
+ return false;
+ }
+
/// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).
@@ -3667,6 +3680,11 @@ private:
SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Mon Jul 16 05:44:10 2018
@@ -1850,6 +1850,80 @@ SDValue TargetLowering::simplifySetCCWit
return SDValue();
}
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT)
+ NewCond = ISD::CondCode::SETEQ;
+ else if (Cond == ISD::CondCode::SETUGE)
+ NewCond = ISD::CondCode::SETNE;
+ else
+ return SDValue();
+
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ const APInt &I1 = C1->getAPIntValue();
+ const APInt &I01 = C01->getAPIntValue();
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+ return SDValue();
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2246,6 +2320,10 @@ SDValue TargetLowering::SimplifySetCC(EV
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
}
+
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
}
// These simplifications apply to splat vectors as well.
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Mon Jul 16 05:44:10 2018
@@ -457,6 +457,23 @@ public:
return VT.getSizeInBits() >= 64; // vector 'bic'
}
+ bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const override {
+ // For vectors, we don't have a preference..
+ if (XVT.isVector())
+ return false;
+
+ auto VTIsOk = [](EVT VT) -> bool {
+ return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64;
+ };
+
+ // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
+ // XVT will be larger than KeptBitsVT.
+ MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+ return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+ }
+
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Jul 16 05:44:10 2018
@@ -815,6 +815,24 @@ namespace llvm {
bool preferShiftsToClearExtremeBits(SDValue Y) const override;
+ bool
+ shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const override {
+ // For vectors, we don't have a preference..
+ if (XVT.isVector())
+ return false;
+
+ auto VTIsOk = [](EVT VT) -> bool {
+ return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64;
+ };
+
+ // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
+ // XVT will be larger than KeptBitsVT.
+ MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+ return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
Modified: llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll Mon Jul 16 05:44:10 2018
@@ -183,10 +183,10 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nou
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i16_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
+; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: cmp w8, w0, uxth
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -196,9 +196,8 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nou
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: cmp w8, #0 // =0
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@@ -209,9 +208,9 @@ define i1 @add_ugecmp_i32_i16(i32 %x) no
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
-; CHECK-NEXT: cmp w8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -221,10 +220,8 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nou
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x80000000
-; CHECK-NEXT: add x8, x0, x8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -235,9 +232,8 @@ define i1 @add_ugecmp_i64_i32(i64 %x) no
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT: lsr x8, x8, #16
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@@ -248,9 +244,9 @@ define i1 @add_ugecmp_i64_i16(i64 %x) no
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128 // =128
-; CHECK-NEXT: cmp x8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
Modified: llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll Mon Jul 16 05:44:10 2018
@@ -185,10 +185,10 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nou
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i16_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
+; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: cmp w8, w0, uxth
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -198,9 +198,9 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nou
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT: cmp w8, #16, lsl #12 // =65536
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -210,9 +210,9 @@ define i1 @add_ultcmp_i32_i16(i32 %x) no
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
-; CHECK-NEXT: cmp w8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -222,10 +222,8 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nou
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x80000000
-; CHECK-NEXT: add x8, x0, x8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -236,9 +234,9 @@ define i1 @add_ultcmp_i64_i32(i64 %x) no
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT: cmp x8, #16, lsl #12 // =65536
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -248,9 +246,9 @@ define i1 @add_ultcmp_i64_i16(i64 %x) no
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128 // =128
-; CHECK-NEXT: cmp x8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
Modified: llvm/trunk/test/CodeGen/X86/lack-of-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lack-of-signed-truncation-check.ll?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lack-of-signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lack-of-signed-truncation-check.ll Mon Jul 16 05:44:10 2018
@@ -295,19 +295,17 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nou
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
; X86-LABEL: add_ugecmp_i16_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: cmpl $255, %eax
-; X86-NEXT: seta %al
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i16_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: cmpl $255, %eax
-; X64-NEXT: seta %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpw %di, %ax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -317,17 +315,17 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nou
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; X86-LABEL: add_ugecmp_i32_i16:
; X86: # %bb.0:
-; X86-NEXT: movl $32768, %eax # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i32_i16:
; X64: # %bb.0:
-; X64-NEXT: addl $32768, %edi # imm = 0x8000
-; X64-NEXT: cmpl $65535, %edi # imm = 0xFFFF
-; X64-NEXT: seta %al
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16
@@ -337,17 +335,17 @@ define i1 @add_ugecmp_i32_i16(i32 %x) no
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; X86-LABEL: add_ugecmp_i32_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $255, %eax
-; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i32_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: cmpl $255, %edi
-; X64-NEXT: seta %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -358,16 +356,15 @@ define i1 @add_ugecmp_i64_i32(i64 %x) no
; X86-LABEL: add_ugecmp_i64_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i32:
; X64: # %bb.0:
-; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT: shrq $32, %rdi
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -378,24 +375,20 @@ define i1 @add_ugecmp_i64_i32(i64 %x) no
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; X86-LABEL: add_ugecmp_i64_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl $65535, %esi # imm = 0xFFFF
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setb %al
-; X86-NEXT: popl %esi
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i16:
; X64: # %bb.0:
-; X64-NEXT: addq $32768, %rdi # imm = 0x8000
-; X64-NEXT: cmpq $65535, %rdi # imm = 0xFFFF
-; X64-NEXT: seta %al
+; X64-NEXT: movswq %di, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16
@@ -405,24 +398,20 @@ define i1 @add_ugecmp_i64_i16(i64 %x) no
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; X86-LABEL: add_ugecmp_i64_i8:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $128, %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl $255, %esi
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setb %al
-; X86-NEXT: popl %esi
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i8:
; X64: # %bb.0:
-; X64-NEXT: subq $-128, %rdi
-; X64-NEXT: cmpq $255, %rdi
-; X64-NEXT: seta %al
+; X64-NEXT: movsbq %dil, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
Modified: llvm/trunk/test/CodeGen/X86/signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/signed-truncation-check.ll?rev=337166&r1=337165&r2=337166&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/X86/signed-truncation-check.ll Mon Jul 16 05:44:10 2018
@@ -299,19 +299,17 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nou
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
; X86-LABEL: add_ultcmp_i16_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: cmpl $256, %eax # imm = 0x100
-; X86-NEXT: setb %al
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i16_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: cmpl $256, %eax # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpw %di, %ax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -321,17 +319,17 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nou
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; X86-LABEL: add_ultcmp_i32_i16:
; X86: # %bb.0:
-; X86-NEXT: movl $32768, %eax # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $65536, %eax # imm = 0x10000
-; X86-NEXT: setb %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i32_i16:
; X64: # %bb.0:
-; X64-NEXT: addl $32768, %edi # imm = 0x8000
-; X64-NEXT: cmpl $65536, %edi # imm = 0x10000
-; X64-NEXT: setb %al
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -341,17 +339,17 @@ define i1 @add_ultcmp_i32_i16(i32 %x) no
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; X86-LABEL: add_ultcmp_i32_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $256, %eax # imm = 0x100
-; X86-NEXT: setb %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i32_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: cmpl $256, %edi # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -362,16 +360,15 @@ define i1 @add_ultcmp_i64_i32(i64 %x) no
; X86-LABEL: add_ultcmp_i64_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i32:
; X64: # %bb.0:
-; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT: shrq $32, %rdi
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -383,19 +380,19 @@ define i1 @add_ultcmp_i64_i16(i64 %x) no
; X86-LABEL: add_ultcmp_i64_i16:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: cmpl $65536, %ecx # imm = 0x10000
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: setb %al
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i16:
; X64: # %bb.0:
-; X64-NEXT: addq $32768, %rdi # imm = 0x8000
-; X64-NEXT: cmpq $65536, %rdi # imm = 0x10000
-; X64-NEXT: setb %al
+; X64-NEXT: movswq %di, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -406,19 +403,19 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nou
; X86-LABEL: add_ultcmp_i64_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $128, %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: cmpl $256, %ecx # imm = 0x100
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: setb %al
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i8:
; X64: # %bb.0:
-; X64-NEXT: subq $-128, %rdi
-; X64-NEXT: cmpq $256, %rdi # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbq %dil, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
More information about the llvm-commits
mailing list