[llvm] r311806 - [x86] Teach the backend to fold more read-modify-write memory operands
Chandler Carruth via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 25 15:50:52 PDT 2017
Author: chandlerc
Date: Fri Aug 25 15:50:52 2017
New Revision: 311806
URL: http://llvm.org/viewvc/llvm-project?rev=311806&view=rev
Log:
[x86] Teach the backend to fold more read-modify-write memory operands
to instructions.
These can't be reasonably matched in tablegen due to the handling of
flags, so we have to do this in C++ code. We only did it for `inc` and
`dec` historically, this starts fleshing that out to more interesting
instructions. Notably, this handles transfering operands to `add` and
`sub`.
Currently this forces them into a register. The next patch will add
support for keeping immediate operands as immediates. Then I'll extend
this beyond just `add` and `sub`.
I'm not super thrilled by the repeated switches in the code but
everything else I tried was really ugly or problematic.
Many thanks to Craig Topper for the suggestions about where to even
begin here and how to make this stuff work.
Differential Revision: https://reviews.llvm.org/D37130
Added:
llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/X86/add.ll
llvm/trunk/test/CodeGen/X86/addcarry.ll
llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
llvm/trunk/test/CodeGen/X86/pr32659.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=311806&r1=311805&r2=311806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Aug 25 15:50:52 2017
@@ -1932,42 +1932,6 @@ static bool hasNoSignedComparisonUses(SD
return true;
}
-/// Get the appropriate X86 opcode for an in-memory arithmetic operation that
-/// also sets flags.
-///
-/// FIXME: This is essentially re-implemneting a subset of the patterns for
-/// these instructions. Instead, we should compute this from the patterns
-/// somehow.
-///
-/// FIXME: Currently we only support integer operations.
-///
-/// If there is no X86 opcode, returns none.
-static Optional<unsigned> getFusedLdStWithFlagsOpcode(EVT LdVT, unsigned Opc) {
- auto SelectSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
- unsigned Opc8) -> Optional<unsigned> {
- switch (LdVT.getSimpleVT().SimpleTy) {
- case MVT::i64:
- return Opc64;
- case MVT::i32:
- return Opc32;
- case MVT::i16:
- return Opc16;
- case MVT::i8:
- return Opc8;
- default:
- return None;
- }
- };
- switch (Opc) {
- default:
- return None;
- case X86ISD::DEC:
- return SelectSize(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- case X86ISD::INC:
- return SelectSize(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m);
- }
-}
-
/// Check whether or not the chain ending in StoreNode is suitable for doing
/// the {load; op; store} to modify transformation.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
@@ -2047,15 +2011,16 @@ static bool isFusableLoadOpStorePattern(
return true;
}
-// Change a chain of {load; incr or dec; store} of the same value into
-// a simple increment or decrement through memory of that value, if the
-// uses of the modified value and its address are suitable.
-// The DEC64m tablegen pattern is currently not able to match the case where
-// the EFLAGS on the original DEC are used. (This also applies to
-// {INC,DEC}X{64,32,16,8}.)
-// We'll need to improve tablegen to allow flags to be transferred from a
-// node in the pattern to the result node. probably with a new keyword
-// for example, we have this
+// Change a chain of {load; op; store} of the same value into a simple op
+// through memory of that value, if the uses of the modified value and its
+// address are suitable.
+//
+// The tablegen pattern memory operand pattern is currently not able to match
+// the case where the EFLAGS on the original operation are used.
+//
+// To move this to tablegen, we'll need to improve tablegen to allow flags to
+// be transferred from a node in the pattern to the result node, probably with
+// a new keyword. For example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (implicit EFLAGS)]>;
@@ -2064,19 +2029,29 @@ static bool isFusableLoadOpStorePattern(
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (transferrable EFLAGS)]>;
//
-// FIXME: This should handle a wide range of operations which support RMW
-// memory operands, not just inc and dec.
+// Until then, we manually fold these and instruction select the operation
+// here.
bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
SDValue StoredVal = StoreNode->getOperand(1);
unsigned Opc = StoredVal->getOpcode();
+ // Before we try to select anything, make sure this is memory operand size
+ // and opcode we can handle. Note that this must match the code below that
+ // actually lowers the opcodes.
EVT MemVT = StoreNode->getMemoryVT();
- if (!MemVT.isSimple())
+ if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
+ MemVT != MVT::i8)
return false;
- Optional<unsigned> NewOpc = getFusedLdStWithFlagsOpcode(MemVT, Opc);
- if (!NewOpc)
+ switch (Opc) {
+ default:
return false;
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ break;
+ }
LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
@@ -2089,12 +2064,57 @@ bool X86DAGToDAGISel::foldLoadStoreIntoM
Segment))
return false;
+ auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
+ unsigned Opc8) {
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ case MVT::i64:
+ return Opc64;
+ case MVT::i32:
+ return Opc32;
+ case MVT::i16:
+ return Opc16;
+ case MVT::i8:
+ return Opc8;
+ default:
+ llvm_unreachable("Invalid size!");
+ }
+ };
+
+ MachineSDNode *Result;
+ switch (Opc) {
+ case X86ISD::INC:
+ case X86ISD::DEC: {
+ unsigned NewOpc = Opc == X86ISD::INC
+ ? SelectOpcodeForSize(X86::INC64m, X86::INC32m,
+ X86::INC16m, X86::INC8m)
+ : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m,
+ X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result =
+ CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+ break;
+ }
+ case X86ISD::ADD:
+ case X86ISD::SUB: {
+ unsigned NewOpc = Opc == X86ISD::ADD
+ ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr,
+ X86::ADD16mr, X86::ADD8mr)
+ : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr,
+ X86::SUB16mr, X86::SUB8mr);
+ const SDValue Ops[] = {Base, Scale, Index,
+ Disp, Segment, StoredVal->getOperand(1),
+ InputChain};
+ Result =
+ CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+ break;
+ }
+ default:
+ llvm_unreachable("Invalid opcode!");
+ }
+
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- MachineSDNode *Result =
- CurDAG->getMachineNode(*NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
Result->setMemRefs(MemOp, MemOp + 2);
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
Modified: llvm/trunk/test/CodeGen/X86/add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/add.ll?rev=311806&r1=311805&r2=311806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/add.ll Fri Aug 25 15:50:52 2017
@@ -341,9 +341,8 @@ define void @test12(i64* inreg %a) nounw
; X32-LABEL: test12:
; X32: # BB#0: # %entry
; X32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X32-NEXT: addl (%eax), %ecx
+; X32-NEXT: addl %ecx, (%eax)
; X32-NEXT: adcl $0, 4(%eax)
-; X32-NEXT: movl %ecx, (%eax)
; X32-NEXT: retl
;
; X64-LINUX-LABEL: test12:
@@ -366,9 +365,8 @@ define void @test13(i64* inreg %a) nounw
; X32-LABEL: test13:
; X32: # BB#0: # %entry
; X32-NEXT: movl $128, %ecx
-; X32-NEXT: addl (%eax), %ecx
+; X32-NEXT: addl %ecx, (%eax)
; X32-NEXT: adcl $0, 4(%eax)
-; X32-NEXT: movl %ecx, (%eax)
; X32-NEXT: retl
;
; X64-LINUX-LABEL: test13:
Modified: llvm/trunk/test/CodeGen/X86/addcarry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/addcarry.ll?rev=311806&r1=311805&r2=311806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/addcarry.ll (original)
+++ llvm/trunk/test/CodeGen/X86/addcarry.ll Fri Aug 25 15:50:52 2017
@@ -171,8 +171,7 @@ define void @muladd(%accumulator* nocapt
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: mulq %rsi
-; CHECK-NEXT: addq (%rdi), %rax
-; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: addq %rax, (%rdi)
; CHECK-NEXT: adcq 8(%rdi), %rdx
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: adcl $0, 16(%rdi)
Added: llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll?rev=311806&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll Fri Aug 25 15:50:52 2017
@@ -0,0 +1,420 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+ at g64 = external global i64, align 8
+ at g32 = external global i32, align 4
+ at g16 = external global i16, align 2
+ at g8 = external global i8, align 1
+
+declare void @a()
+declare void @b()
+
+define void @add64_imm_br() nounwind {
+; CHECK-LABEL: add64_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: addq %rax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB0_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB0_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %add = add nsw i64 %load1, 42
+ store i64 %add, i64* @g64
+ %cond = icmp slt i64 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add32_imm_br() nounwind {
+; CHECK-LABEL: add32_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: addl %eax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB1_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB1_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %add = add nsw i32 %load1, 42
+ store i32 %add, i32* @g32
+ %cond = icmp slt i32 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add16_imm_br() nounwind {
+; CHECK-LABEL: add16_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movw $42, %ax
+; CHECK-NEXT: addw %ax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB2_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB2_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %add = add nsw i16 %load1, 42
+ store i16 %add, i16* @g16
+ %cond = icmp slt i16 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add8_imm_br() nounwind {
+; CHECK-LABEL: add8_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $42, %al
+; CHECK-NEXT: addb %al, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB3_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB3_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %add = add nsw i8 %load1, 42
+ store i8 %add, i8* @g8
+ %cond = icmp slt i8 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: add64_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addq %rdi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB4_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB4_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %add = add nsw i64 %load1, %arg
+ store i64 %add, i64* @g64
+ %cond = icmp slt i64 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: add32_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addl %edi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB5_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB5_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %add = add nsw i32 %load1, %arg
+ store i32 %add, i32* @g32
+ %cond = icmp slt i32 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: add16_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addw %di, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB6_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB6_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %add = add nsw i16 %load1, %arg
+ store i16 %add, i16* @g16
+ %cond = icmp slt i16 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: add8_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addb %dil, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB7_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB7_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %add = add nsw i8 %load1, %arg
+ store i8 %add, i8* @g8
+ %cond = icmp slt i8 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub64_imm_br() nounwind {
+; CHECK-LABEL: sub64_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movq $-42, %rax
+; CHECK-NEXT: addq %rax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB8_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB8_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %sub = sub nsw i64 %load1, 42
+ store i64 %sub, i64* @g64
+ %cond = icmp slt i64 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub32_imm_br() nounwind {
+; CHECK-LABEL: sub32_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $-42, %eax
+; CHECK-NEXT: addl %eax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB9_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB9_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %sub = sub nsw i32 %load1, 42
+ store i32 %sub, i32* @g32
+ %cond = icmp slt i32 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub16_imm_br() nounwind {
+; CHECK-LABEL: sub16_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movw $-42, %ax
+; CHECK-NEXT: addw %ax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB10_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB10_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %sub = sub nsw i16 %load1, 42
+ store i16 %sub, i16* @g16
+ %cond = icmp slt i16 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub8_imm_br() nounwind {
+; CHECK-LABEL: sub8_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $-42, %al
+; CHECK-NEXT: addb %al, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB11_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB11_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %sub = sub nsw i8 %load1, 42
+ store i8 %sub, i8* @g8
+ %cond = icmp slt i8 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: sub64_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subq %rdi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB12_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB12_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %sub = sub nsw i64 %load1, %arg
+ store i64 %sub, i64* @g64
+ %cond = icmp slt i64 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: sub32_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subl %edi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB13_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB13_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %sub = sub nsw i32 %load1, %arg
+ store i32 %sub, i32* @g32
+ %cond = icmp slt i32 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: sub16_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subw %di, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB14_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB14_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %sub = sub nsw i16 %load1, %arg
+ store i16 %sub, i16* @g16
+ %cond = icmp slt i16 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: sub8_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subb %dil, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB15_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB15_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %sub = sub nsw i8 %load1, %arg
+ store i8 %sub, i8* @g8
+ %cond = icmp slt i8 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll?rev=311806&r1=311805&r2=311806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll (original)
+++ llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll Fri Aug 25 15:50:52 2017
@@ -65,10 +65,9 @@ exit2:
define i1 @plus_forty_two() nounwind {
; CHECK32-LABEL: plus_forty_two:
; CHECK32: # BB#0: # %entry
-; CHECK32-NEXT: movl L, %ecx
; CHECK32-NEXT: movb M, %al
-; CHECK32-NEXT: addl $42, %ecx
-; CHECK32-NEXT: movl %ecx, L
+; CHECK32-NEXT: movl $42, %ecx
+; CHECK32-NEXT: addl %ecx, L
; CHECK32-NEXT: jne .LBB1_2
; CHECK32-NEXT: # BB#1: # %entry
; CHECK32-NEXT: andb $8, %al
@@ -82,10 +81,9 @@ define i1 @plus_forty_two() nounwind {
;
; CHECK64-LABEL: plus_forty_two:
; CHECK64: # BB#0: # %entry
-; CHECK64-NEXT: movl {{.*}}(%rip), %ecx
; CHECK64-NEXT: movb {{.*}}(%rip), %al
-; CHECK64-NEXT: addl $42, %ecx
-; CHECK64-NEXT: movl %ecx, {{.*}}(%rip)
+; CHECK64-NEXT: movl $42, %ecx
+; CHECK64-NEXT: addl %ecx, {{.*}}(%rip)
; CHECK64-NEXT: jne .LBB1_2
; CHECK64-NEXT: # BB#1: # %entry
; CHECK64-NEXT: andb $8, %al
@@ -165,10 +163,9 @@ exit2:
define i1 @minus_forty_two() nounwind {
; CHECK32-LABEL: minus_forty_two:
; CHECK32: # BB#0: # %entry
-; CHECK32-NEXT: movl L, %ecx
; CHECK32-NEXT: movb M, %al
-; CHECK32-NEXT: addl $-42, %ecx
-; CHECK32-NEXT: movl %ecx, L
+; CHECK32-NEXT: movl $-42, %ecx
+; CHECK32-NEXT: addl %ecx, L
; CHECK32-NEXT: jne .LBB3_2
; CHECK32-NEXT: # BB#1: # %entry
; CHECK32-NEXT: andb $8, %al
@@ -182,10 +179,9 @@ define i1 @minus_forty_two() nounwind {
;
; CHECK64-LABEL: minus_forty_two:
; CHECK64: # BB#0: # %entry
-; CHECK64-NEXT: movl {{.*}}(%rip), %ecx
; CHECK64-NEXT: movb {{.*}}(%rip), %al
-; CHECK64-NEXT: addl $-42, %ecx
-; CHECK64-NEXT: movl %ecx, {{.*}}(%rip)
+; CHECK64-NEXT: movl $-42, %ecx
+; CHECK64-NEXT: addl %ecx, {{.*}}(%rip)
; CHECK64-NEXT: jne .LBB3_2
; CHECK64-NEXT: # BB#1: # %entry
; CHECK64-NEXT: andb $8, %al
Modified: llvm/trunk/test/CodeGen/X86/pr32659.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32659.ll?rev=311806&r1=311805&r2=311806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32659.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32659.ll Fri Aug 25 15:50:52 2017
@@ -50,10 +50,10 @@ define void @fn2() nounwind optsize {
; CHECK-NEXT: sarl $31, %eax
; CHECK-NEXT: andl %eax, e+4
; CHECK-NEXT: decl g
-; CHECK-NEXT: movl f, %eax
-; CHECK-NEXT: addl $1, %eax
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: addl %eax, f
; CHECK-NEXT: adcl $0, f+4
-; CHECK-NEXT: movl %eax, f
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
More information about the llvm-commits
mailing list