[llvm] r354298 - [CGP] form usub with overflow from sub+icmp

Mon Feb 18 15:33:05 PST 2019

Author: spatel
Date: Mon Feb 18 15:33:05 2019
New Revision: 354298

URL: http://llvm.org/viewvc/llvm-project?rev=354298&view=rev
Log:
[CGP] form usub with overflow from sub+icmp

The motivating x86 cases for forming the intrinsic are shown in PR31754 and PR40487:
https://bugs.llvm.org/show_bug.cgi?id=31754
https://bugs.llvm.org/show_bug.cgi?id=40487
..and those are shown in the IR test file and x86 codegen file.

Matching the usubo pattern is harder than uaddo because we have 2 independent values rather than a def-use.

This adds a TLI hook that should preserve the existing behavior for uaddo formation, but disables usubo
formation by default. Only x86 overrides that setting for now although other targets will likely benefit
by forming usbuo too.

Differential Revision: https://reviews.llvm.org/D57789

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Feb 18 15:33:05 2019
@@ -2439,6 +2439,23 @@ public:
     return false;
   }
 
+  /// Try to convert math with an overflow comparison into the corresponding DAG
+  /// node operation. Targets may want to override this independently of whether
+  /// the operation is legal/custom for the given type because it may obscure
+  /// matching of other patterns.
+  virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+    // TODO: The default logic is inherited from code in CodeGenPrepare.
+    // The opcode should not make a difference by default?
+    if (Opcode != ISD::UADDO)
+      return false;
+
+    // Allow the transform as long as we have an integer type that is not
+    // obviously illegal and unsupported.
+    if (VT.isVector())
+      return false;
+    return VT.isSimple() || !isOperationExpand(Opcode, VT);
+  }
+
   // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
   // even if the vector itself has multiple uses.
   virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Feb 18 15:33:05 2019
@@ -1162,9 +1162,18 @@ static bool OptimizeNoopCopyExpression(C
 static void replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
                                         Instruction *InsertPt,
                                         Intrinsic::ID IID) {
+  Value *Arg0 = BO->getOperand(0);
+  Value *Arg1 = BO->getOperand(1);
+
+  // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+  if (BO->getOpcode() == Instruction::Add &&
+      IID == Intrinsic::usub_with_overflow) {
+    assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+    Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+  }
+
   IRBuilder<> Builder(InsertPt);
-  Value *MathOV = Builder.CreateBinaryIntrinsic(IID, BO->getOperand(0),
-                                                BO->getOperand(1));
+  Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
   Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
   Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
   BO->replaceAllUsesWith(Math);
@@ -1182,13 +1191,8 @@ static bool combineToUAddWithOverflow(Cm
   if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
     return false;
 
-  // Allow the transform as long as we have an integer type that is not
-  // obviously illegal and unsupported.
-  Type *Ty = Add->getType();
-  if (!isa<IntegerType>(Ty))
-    return false;
-  EVT CodegenVT = TLI.getValueType(DL, Ty);
-  if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+  if (!TLI.shouldFormOverflowOp(ISD::UADDO,
+                                TLI.getValueType(DL, Add->getType())))
     return false;
 
   // We don't want to move around uses of condition values this late, so we
@@ -1210,6 +1214,64 @@ static bool combineToUAddWithOverflow(Cm
   return true;
 }
 
+static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+                                      const DataLayout &DL, bool &ModifiedDT) {
+  // Convert (A u> B) to (A u< B) to simplify pattern matching.
+  Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred == ICmpInst::ICMP_UGT) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_ULT;
+  }
+  // Convert special-case: (A == 0) is the same as (A u< 1).
+  if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+    B = ConstantInt::get(B->getType(), 1);
+    Pred = ICmpInst::ICMP_ULT;
+  }
+  if (Pred != ICmpInst::ICMP_ULT)
+    return false;
+
+  // Walk the users of a variable operand of a compare looking for a subtract or
+  // add with that same operand. Also match the 2nd operand of the compare to
+  // the add/sub, but that may be a negated constant operand of an add.
+  Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+  BinaryOperator *Sub = nullptr;
+  for (User *U : CmpVariableOperand->users()) {
+    // A - B, A u< B --> usubo(A, B)
+    if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+
+    // A + (-C), A u< C (canonicalized form of (sub A, C))
+    const APInt *CmpC, *AddC;
+    if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+        match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+  }
+  if (!Sub)
+    return false;
+
+  if (!TLI.shouldFormOverflowOp(ISD::USUBO,
+                                TLI.getValueType(DL, Sub->getType())))
+    return false;
+
+  // Pattern matched and profitability checked. Check dominance to determine the
+  // insertion point for an intrinsic that replaces the subtract and compare.
+  DominatorTree DT(*Sub->getFunction());
+  bool SubDominates = DT.dominates(Sub, Cmp);
+  if (!SubDominates && !DT.dominates(Cmp, Sub))
+    return false;
+  Instruction *InPt = SubDominates ? cast<Instruction>(Sub)
+                                   : cast<Instruction>(Cmp);
+  replaceMathCmpWithIntrinsic(Sub, Cmp, InPt, Intrinsic::usub_with_overflow);
+  // Reset callers - do not crash by iterating over a dead instruction.
+  ModifiedDT = true;
+  return true;
+}
+
 /// Sink the given CmpInst into user blocks to reduce the number of virtual
 /// registers that must be created and coalesced. This is a clear win except on
 /// targets with multiple condition code registers (PowerPC), where it might
@@ -1276,14 +1338,17 @@ static bool sinkCmpExpression(CmpInst *C
   return MadeChange;
 }
 
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
-                                  const DataLayout &DL) {
+static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI,
+                        const DataLayout &DL, bool &ModifiedDT) {
   if (sinkCmpExpression(Cmp, TLI))
     return true;
 
   if (combineToUAddWithOverflow(Cmp, TLI, DL))
     return true;
 
+  if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT))
+    return true;
+
   return false;
 }
 
@@ -6770,8 +6835,8 @@ bool CodeGenPrepare::optimizeInst(Instru
     return false;
   }
 
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
+  if (auto *Cmp = dyn_cast<CmpInst>(I))
+    if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT))
       return true;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 18 15:33:05 2019
@@ -4934,6 +4934,13 @@ bool X86TargetLowering::shouldScalarizeB
   return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
 }
 
+bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+  // TODO: Allow vectors?
+  if (VT.isVector())
+    return false;
+  return VT.isSimple() || !isOperationExpand(Opcode, VT);
+}
+
 bool X86TargetLowering::isCheapToSpeculateCttz() const {
   // Speculate cttz only if we can directly use TZCNT.
   return Subtarget.hasBMI();

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Feb 18 15:33:05 2019
@@ -1071,6 +1071,11 @@ namespace llvm {
     /// supported.
     bool shouldScalarizeBinop(SDValue) const override;
 
+    /// Overflow nodes should get combined/lowered to optimal instructions
+    /// (they should allow eliminating explicit compares by getting flags from
+    /// math ops).
+    bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
+
     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
                                       unsigned AddrSpace) const override {
       // If we can replace more than 2 scalar stores, there will be a reduction

Modified: llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cgp-usubo.ll?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cgp-usubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cgp-usubo.ll Mon Feb 18 15:33:05 2019
@@ -7,8 +7,8 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y,
 ; CHECK-LABEL: usubo_ult_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    subq %rsi, %rdi
-; CHECK-NEXT:    movq %rdi, (%rdx)
 ; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    movq %rdi, (%rdx)
 ; CHECK-NEXT:    retq
   %s = sub i64 %x, %y
   store i64 %s, i64* %p
@@ -21,9 +21,8 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y,
 define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind {
 ; CHECK-LABEL: usubo_ugt_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    subl %esi, %edi
+; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    movl %edi, (%rdx)
 ; CHECK-NEXT:    retq
   %ov = icmp ugt i32 %y, %x
@@ -39,8 +38,7 @@ define i1 @usubo_ugt_constant_op0_i8(i8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movb $42, %cl
 ; CHECK-NEXT:    subb %dil, %cl
-; CHECK-NEXT:    cmpb $42, %dil
-; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    movb %cl, (%rsi)
 ; CHECK-NEXT:    retq
   %s = sub i8 42, %x
@@ -54,10 +52,9 @@ define i1 @usubo_ugt_constant_op0_i8(i8
 define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) nounwind {
 ; CHECK-LABEL: usubo_ult_constant_op0_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $43, %ecx
-; CHECK-NEXT:    subl %edi, %ecx
-; CHECK-NEXT:    cmpw $43, %di
-; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movw $43, %cx
+; CHECK-NEXT:    subw %di, %cx
+; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    movw %cx, (%rsi)
 ; CHECK-NEXT:    retq
   %s = sub i16 43, %x
@@ -71,11 +68,9 @@ define i1 @usubo_ult_constant_op0_i16(i1
 define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) nounwind {
 ; CHECK-LABEL: usubo_ult_constant_op1_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    addl $-44, %ecx
-; CHECK-NEXT:    cmpw $44, %di
+; CHECK-NEXT:    subw $44, %di
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    movw %cx, (%rsi)
+; CHECK-NEXT:    movw %di, (%rsi)
 ; CHECK-NEXT:    retq
   %s = add i16 %x, -44
   %ov = icmp ult i16 %x, 44
@@ -86,9 +81,8 @@ define i1 @usubo_ult_constant_op1_i16(i1
 define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) nounwind {
 ; CHECK-LABEL: usubo_ugt_constant_op1_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpb $45, %dil
+; CHECK-NEXT:    subb $45, %dil
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    addb $-45, %dil
 ; CHECK-NEXT:    movb %dil, (%rsi)
 ; CHECK-NEXT:    retq
   %ov = icmp ugt i8 45, %x
@@ -102,11 +96,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8
 define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) nounwind {
 ; CHECK-LABEL: usubo_eq_constant1_op1_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal -1(%rdi), %ecx
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movl %ecx, (%rsi)
+; CHECK-NEXT:    subl $1, %edi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    movl %edi, (%rsi)
 ; CHECK-NEXT:    retq
   %s = add i32 %x, -1
   %ov = icmp eq i32 %x, 0
@@ -124,17 +116,14 @@ define i1 @usubo_ult_sub_dominates_i64(i
 ; CHECK-NEXT:    testb $1, %cl
 ; CHECK-NEXT:    je .LBB7_2
 ; CHECK-NEXT:  # %bb.1: # %t
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    subq %rsi, %rax
-; CHECK-NEXT:    movq %rax, (%rdx)
-; CHECK-NEXT:    testb $1, %cl
-; CHECK-NEXT:    je .LBB7_2
-; CHECK-NEXT:  # %bb.3: # %end
-; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    subq %rsi, %rdi
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movq %rdi, (%rdx)
+; CHECK-NEXT:    testb $1, %cl
+; CHECK-NEXT:    jne .LBB7_3
 ; CHECK-NEXT:  .LBB7_2: # %f
 ; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:  .LBB7_3: # %end
 ; CHECK-NEXT:    retq
 entry:
   br i1 %cond, label %t, label %f

Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Mon Feb 18 15:33:05 2019
@@ -16,11 +16,11 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movl (%rdx), %eax
 ; GENERIC-NEXT:    movl 4(%rdx), %ebx
 ; GENERIC-NEXT:    decl %ecx
-; GENERIC-NEXT:    leaq 20(%rdx), %r14
+; GENERIC-NEXT:    leaq 20(%rdx), %r11
 ; GENERIC-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; GENERIC-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; GENERIC-NEXT:    movq _Te3@{{.*}}(%rip), %r10
-; GENERIC-NEXT:    movq %rcx, %r11
+; GENERIC-NEXT:    movq %rcx, %r14
 ; GENERIC-NEXT:    jmp LBB0_1
 ; GENERIC-NEXT:    .p2align 4, 0x90
 ; GENERIC-NEXT:  LBB0_2: ## %bb1
@@ -29,14 +29,13 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    shrl $16, %ebx
 ; GENERIC-NEXT:    movzbl %bl, %ebx
 ; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT:    xorl -4(%r14), %eax
+; GENERIC-NEXT:    xorl -4(%r11), %eax
 ; GENERIC-NEXT:    shrl $24, %edi
 ; GENERIC-NEXT:    movzbl %bpl, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
 ; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT:    xorl (%r14), %ebx
-; GENERIC-NEXT:    decq %r11
-; GENERIC-NEXT:    addq $16, %r14
+; GENERIC-NEXT:    xorl (%r11), %ebx
+; GENERIC-NEXT:    addq $16, %r11
 ; GENERIC-NEXT:  LBB0_1: ## %bb
 ; GENERIC-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; GENERIC-NEXT:    movzbl %al, %edi
@@ -47,16 +46,16 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movzbl %bpl, %ebp
 ; GENERIC-NEXT:    movl (%r8,%rbp,4), %ebp
 ; GENERIC-NEXT:    xorl (%r9,%rax,4), %ebp
-; GENERIC-NEXT:    xorl -12(%r14), %ebp
+; GENERIC-NEXT:    xorl -12(%r11), %ebp
 ; GENERIC-NEXT:    shrl $24, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rdi,4), %edi
 ; GENERIC-NEXT:    xorl (%r9,%rbx,4), %edi
-; GENERIC-NEXT:    xorl -8(%r14), %edi
+; GENERIC-NEXT:    xorl -8(%r11), %edi
 ; GENERIC-NEXT:    movl %ebp, %eax
 ; GENERIC-NEXT:    shrl $24, %eax
 ; GENERIC-NEXT:    movl (%r9,%rax,4), %eax
-; GENERIC-NEXT:    testq %r11, %r11
-; GENERIC-NEXT:    jne LBB0_2
+; GENERIC-NEXT:    subq $1, %r14
+; GENERIC-NEXT:    jae LBB0_2
 ; GENERIC-NEXT:  ## %bb.3: ## %bb2
 ; GENERIC-NEXT:    shlq $4, %rcx
 ; GENERIC-NEXT:    andl $-16777216, %eax ## imm = 0xFF000000
@@ -99,27 +98,26 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    ## kill: def $ecx killed $ecx def $rcx
 ; ATOM-NEXT:    movl (%rdx), %r15d
 ; ATOM-NEXT:    movl 4(%rdx), %eax
-; ATOM-NEXT:    leaq 20(%rdx), %r14
+; ATOM-NEXT:    leaq 20(%rdx), %r11
 ; ATOM-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; ATOM-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; ATOM-NEXT:    movq _Te3@{{.*}}(%rip), %r10
 ; ATOM-NEXT:    decl %ecx
-; ATOM-NEXT:    movq %rcx, %r11
+; ATOM-NEXT:    movq %rcx, %r14
 ; ATOM-NEXT:    jmp LBB0_1
 ; ATOM-NEXT:    .p2align 4, 0x90
 ; ATOM-NEXT:  LBB0_2: ## %bb1
 ; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $24, %edi
-; ATOM-NEXT:    decq %r11
-; ATOM-NEXT:    movzbl %al, %ebp
+; ATOM-NEXT:    movzbl %al, %eax
+; ATOM-NEXT:    xorl (%r8,%rax,4), %r15d
 ; ATOM-NEXT:    movzbl %bl, %eax
 ; ATOM-NEXT:    movl (%r10,%rax,4), %eax
-; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
+; ATOM-NEXT:    xorl -4(%r11), %r15d
 ; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT:    xorl -4(%r14), %r15d
-; ATOM-NEXT:    xorl (%r14), %eax
-; ATOM-NEXT:    addq $16, %r14
+; ATOM-NEXT:    xorl (%r11), %eax
+; ATOM-NEXT:    addq $16, %r11
 ; ATOM-NEXT:  LBB0_1: ## %bb
 ; ATOM-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ATOM-NEXT:    movl %eax, %edi
@@ -132,15 +130,15 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    movzbl %r15b, %edi
 ; ATOM-NEXT:    xorl (%r9,%rbp,4), %ebx
 ; ATOM-NEXT:    movl (%r10,%rdi,4), %edi
-; ATOM-NEXT:    xorl -12(%r14), %ebx
+; ATOM-NEXT:    xorl -12(%r11), %ebx
 ; ATOM-NEXT:    xorl (%r9,%rax,4), %edi
 ; ATOM-NEXT:    movl %ebx, %eax
-; ATOM-NEXT:    xorl -8(%r14), %edi
+; ATOM-NEXT:    xorl -8(%r11), %edi
 ; ATOM-NEXT:    shrl $24, %eax
 ; ATOM-NEXT:    movl (%r9,%rax,4), %r15d
-; ATOM-NEXT:    testq %r11, %r11
+; ATOM-NEXT:    subq $1, %r14
 ; ATOM-NEXT:    movl %edi, %eax
-; ATOM-NEXT:    jne LBB0_2
+; ATOM-NEXT:    jae LBB0_2
 ; ATOM-NEXT:  ## %bb.3: ## %bb2
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $8, %edi

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll?rev=354298&r1=354297&r2=354298&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll Mon Feb 18 15:33:05 2019
@@ -175,10 +175,11 @@ define i1 @uaddo_i42_increment_illegal_t
 
 define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) {
 ; CHECK-LABEL: @usubo_ult_i64(
-; CHECK-NEXT:    [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    store i64 [[S]], i64* [[P:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %s = sub i64 %x, %y
   store i64 %s, i64* %p
@@ -190,10 +191,11 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y,
 
 define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @usubo_ugt_i32(
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = sub i32 [[X]], [[Y]]
-; CHECK-NEXT:    store i32 [[S]], i32* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %ov = icmp ugt i32 %y, %x
   %s = sub i32 %x, %y
@@ -205,10 +207,11 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y,
 
 define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) {
 ; CHECK-LABEL: @usubo_ugt_constant_op0_i8(
-; CHECK-NEXT:    [[S:%.*]] = sub i8 42, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt i8 [[X]], 42
-; CHECK-NEXT:    store i8 [[S]], i8* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 42, i8 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %s = sub i8 42, %x
   %ov = icmp ugt i8 %x, 42
@@ -220,10 +223,11 @@ define i1 @usubo_ugt_constant_op0_i8(i8
 
 define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) {
 ; CHECK-LABEL: @usubo_ult_constant_op0_i16(
-; CHECK-NEXT:    [[S:%.*]] = sub i16 43, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp ult i16 43, [[X]]
-; CHECK-NEXT:    store i16 [[S]], i16* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 43, i16 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %s = sub i16 43, %x
   %ov = icmp ult i16 43, %x
@@ -235,10 +239,11 @@ define i1 @usubo_ult_constant_op0_i16(i1
 
 define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) {
 ; CHECK-LABEL: @usubo_ult_constant_op1_i16(
-; CHECK-NEXT:    [[S:%.*]] = add i16 [[X:%.*]], -44
-; CHECK-NEXT:    [[OV:%.*]] = icmp ult i16 [[X]], 44
-; CHECK-NEXT:    store i16 [[S]], i16* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[X:%.*]], i16 44)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %s = add i16 %x, -44
   %ov = icmp ult i16 %x, 44
@@ -248,10 +253,11 @@ define i1 @usubo_ult_constant_op1_i16(i1
 
 define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) {
 ; CHECK-LABEL: @usubo_ugt_constant_op1_i8(
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt i8 45, [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = add i8 [[X]], -45
-; CHECK-NEXT:    store i8 [[S]], i8* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 45)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %ov = icmp ugt i8 45, %x
   %s = add i8 %x, -45
@@ -263,10 +269,11 @@ define i1 @usubo_ugt_constant_op1_i8(i8
 
 define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) {
 ; CHECK-LABEL: @usubo_eq_constant1_op1_i32(
-; CHECK-NEXT:    [[S:%.*]] = add i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[OV:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    store i32 [[S]], i32* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
   %s = add i32 %x, -1
   %ov = icmp eq i32 %x, 0
@@ -283,14 +290,15 @@ define i1 @usubo_ult_sub_dominates_i64(i
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; CHECK:       t:
-; CHECK-NEXT:    [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    store i64 [[S]], i64* [[P:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[END:%.*]], label [[F]]
 ; CHECK:       f:
 ; CHECK-NEXT:    ret i1 [[COND]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
-; CHECK-NEXT:    ret i1 [[OV]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
 entry:
   br i1 %cond, label %t, label %f
@@ -319,10 +327,11 @@ define i1 @usubo_ult_cmp_dominates_i64(i
 ; CHECK:       f:
 ; CHECK-NEXT:    ret i1 [[COND]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i64 [[X]], [[Y]]
-; CHECK-NEXT:    [[S:%.*]] = sub i64 [[X]], [[Y]]
-; CHECK-NEXT:    store i64 [[S]], i64* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[TMP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
 ;
 entry:
   br i1 %cond, label %t, label %f