[llvm] e38fc14 - [X86] Introduce x86-cmov-converter-force-all

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 24 10:47:35 PST 2022


Author: Amir Ayupov
Date: 2022-02-24T10:47:22-08:00
New Revision: e38fc14c43b00f0606ad31a6df9dad1c54413afc

URL: https://github.com/llvm/llvm-project/commit/e38fc14c43b00f0606ad31a6df9dad1c54413afc
DIFF: https://github.com/llvm/llvm-project/commit/e38fc14c43b00f0606ad31a6df9dad1c54413afc.diff

LOG: [X86] Introduce x86-cmov-converter-force-all

Introduce an option to expand all CMOV groups into hammocks, matching GCC's
`-fno-if-conversion2` flag. The motivation is to leave CMOV conversion
opportunities to a binary optimizer that can make the decision based on branch
misprediction rate (available e.g. in Intel's LBR).

Reviewed By: MaskRay, skan

Differential Revision: https://reviews.llvm.org/D119777

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86CmovConversion.cpp
    llvm/test/CodeGen/X86/x86-cmov-converter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 96d3d1390a595..499eca959c8c2 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -97,6 +97,11 @@ static cl::opt<bool> ForceMemOperand(
     cl::desc("Convert cmovs to branches whenever they have memory operands."),
     cl::init(true), cl::Hidden);
 
+static cl::opt<bool> ForceAll(
+    "x86-cmov-converter-force-all",
+    cl::desc("Convert all cmovs to branches."),
+    cl::init(false), cl::Hidden);
+
 namespace {
 
 /// Converts X86 cmov instructions into branches when profitable.
@@ -174,11 +179,11 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
   TSchedModel.init(&STI);
 
   // Before we handle the more subtle cases of register-register CMOVs inside
-  // of potentially hot loops, we want to quickly remove all CMOVs with
-  // a memory operand. The CMOV will risk a stall waiting for the load to
-  // complete that speculative execution behind a branch is better suited to
-  // handle on modern x86 chips.
-  if (ForceMemOperand) {
+  // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or
+  // the ones with a memory operand (ForceMemOperand option). The latter CMOV
+  // will risk a stall waiting for the load to complete that speculative
+  // execution behind a branch is better suited to handle on modern x86 chips.
+  if (ForceMemOperand || ForceAll) {
     CmovGroups AllCmovGroups;
     SmallVector<MachineBasicBlock *, 4> Blocks;
     for (auto &MBB : MF)
@@ -186,7 +191,8 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
     if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
       for (auto &Group : AllCmovGroups) {
         // Skip any group that doesn't do at least one memory operand cmov.
-        if (llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
+        if (ForceMemOperand && !ForceAll &&
+            llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
           continue;
 
         // For CMOV groups which we can rewrite and which contain a memory load,
@@ -196,6 +202,9 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
         convertCmovInstsToBranches(Group);
       }
     }
+    // Early return as ForceAll converts all CmovGroups.
+    if (ForceAll)
+      return Changed;
   }
 
   //===--------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index c5ebe87f9754e..776eddf10f050 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
+; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; This test checks that x86-cmov-converter optimization transform CMOV
@@ -130,6 +131,33 @@ define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocap
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: CmovInHotPath:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    testl %edi, %edi
+; CHECK-FORCEALL-NEXT:    jle .LBB0_5
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edi, %r8d
+; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
+; CHECK-FORCEALL-NEXT:  .LBB0_2: # %for.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-FORCEALL-NEXT:    leal 1(%rax), %r9d
+; CHECK-FORCEALL-NEXT:    imull %esi, %eax
+; CHECK-FORCEALL-NEXT:    movl $10, %r10d
+; CHECK-FORCEALL-NEXT:    cmpl %edx, %eax
+; CHECK-FORCEALL-NEXT:    jg .LBB0_4
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %r9d, %r10d
+; CHECK-FORCEALL-NEXT:  .LBB0_4: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-FORCEALL-NEXT:    imull %r9d, %r10d
+; CHECK-FORCEALL-NEXT:    movl %r10d, (%rcx,%rdi,4)
+; CHECK-FORCEALL-NEXT:    addq $1, %rdi
+; CHECK-FORCEALL-NEXT:    cmpq %rdi, %r8
+; CHECK-FORCEALL-NEXT:    jne .LBB0_2
+; CHECK-FORCEALL-NEXT:  .LBB0_5: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -183,6 +211,37 @@ define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* no
 ; CHECK-NEXT:    jne .LBB1_2
 ; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: CmovNotInHotPath:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    testl %edi, %edi
+; CHECK-FORCEALL-NEXT:    jle .LBB1_5
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edx, %r9d
+; CHECK-FORCEALL-NEXT:    movl %edi, %r10d
+; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
+; CHECK-FORCEALL-NEXT:  .LBB1_2: # %for.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rcx,%rdi,4), %r11d
+; CHECK-FORCEALL-NEXT:    movl %r11d, %eax
+; CHECK-FORCEALL-NEXT:    imull %esi, %eax
+; CHECK-FORCEALL-NEXT:    movl $10, %edx
+; CHECK-FORCEALL-NEXT:    cmpl %r9d, %eax
+; CHECK-FORCEALL-NEXT:    jg .LBB1_4
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %r11d, %edx
+; CHECK-FORCEALL-NEXT:  .LBB1_4: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %edx, (%rcx,%rdi,4)
+; CHECK-FORCEALL-NEXT:    movl (%r8,%rdi,4), %eax
+; CHECK-FORCEALL-NEXT:    cltd
+; CHECK-FORCEALL-NEXT:    idivl %r9d
+; CHECK-FORCEALL-NEXT:    movl %eax, (%r8,%rdi,4)
+; CHECK-FORCEALL-NEXT:    addq $1, %rdi
+; CHECK-FORCEALL-NEXT:    cmpq %rdi, %r10
+; CHECK-FORCEALL-NEXT:    jne .LBB1_2
+; CHECK-FORCEALL-NEXT:  .LBB1_5: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -239,6 +298,33 @@ define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
 ; CHECK-NEXT:    jne .LBB2_2
 ; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: MaxIndex:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
+; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
+; CHECK-FORCEALL-NEXT:    jl .LBB2_5
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edi, %r8d
+; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
+; CHECK-FORCEALL-NEXT:    movl $1, %edx
+; CHECK-FORCEALL-NEXT:  .LBB2_2: # %for.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-FORCEALL-NEXT:    movslq %edi, %rcx
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-FORCEALL-NEXT:    jg .LBB2_4
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:  .LBB2_4: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-FORCEALL-NEXT:    addq $1, %rdx
+; CHECK-FORCEALL-NEXT:    movl %eax, %edi
+; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
+; CHECK-FORCEALL-NEXT:    jne .LBB2_2
+; CHECK-FORCEALL-NEXT:  .LBB2_5: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -296,6 +382,33 @@ define i32 @MaxIndex_unpredictable(i32 %n, i32* nocapture readonly %a) #0 {
 ; CHECK-NEXT:    jne .LBB3_2
 ; CHECK-NEXT:  .LBB3_5: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
+; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
+; CHECK-FORCEALL-NEXT:    jl .LBB3_5
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edi, %r8d
+; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
+; CHECK-FORCEALL-NEXT:    movl $1, %edx
+; CHECK-FORCEALL-NEXT:  .LBB3_2: # %for.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-FORCEALL-NEXT:    movslq %edi, %rcx
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-FORCEALL-NEXT:    jg .LBB3_4
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:  .LBB3_4: # %for.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-FORCEALL-NEXT:    addq $1, %rdx
+; CHECK-FORCEALL-NEXT:    movl %eax, %edi
+; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
+; CHECK-FORCEALL-NEXT:    jne .LBB3_2
+; CHECK-FORCEALL-NEXT:  .LBB3_5: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -343,6 +456,33 @@ define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
 ; CHECK-NEXT:    jne .LBB4_2
 ; CHECK-NEXT:  .LBB4_3: # %for.cond.cleanup
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: MaxValue:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rsi), %ecx
+; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
+; CHECK-FORCEALL-NEXT:    jge .LBB4_3
+; CHECK-FORCEALL-NEXT:  # %bb.1:
+; CHECK-FORCEALL-NEXT:    movl %ecx, %eax
+; CHECK-FORCEALL-NEXT:  .LBB4_2: # %for.cond.cleanup
+; CHECK-FORCEALL-NEXT:    retq
+; CHECK-FORCEALL-NEXT:  .LBB4_3: # %for.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edi, %edi
+; CHECK-FORCEALL-NEXT:    movl $1, %edx
+; CHECK-FORCEALL-NEXT:  .LBB4_4: # %for.body
+; CHECK-FORCEALL-NEXT:  # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %eax
+; CHECK-FORCEALL-NEXT:    cmpl %ecx, %eax
+; CHECK-FORCEALL-NEXT:    jg .LBB4_6
+; CHECK-FORCEALL-NEXT:  # %bb.5: # %for.body
+; CHECK-FORCEALL-NEXT:  # in Loop: Header=BB4_4 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %ecx, %eax
+; CHECK-FORCEALL-NEXT:  .LBB4_6: # %for.body
+; CHECK-FORCEALL-NEXT:  # in Loop: Header=BB4_4 Depth=1
+; CHECK-FORCEALL-NEXT:    addq $1, %rdx
+; CHECK-FORCEALL-NEXT:    movl %eax, %ecx
+; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rdi
+; CHECK-FORCEALL-NEXT:    je .LBB4_2
+; CHECK-FORCEALL-NEXT:    jmp .LBB4_4
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -387,6 +527,24 @@ define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %str
 ; CHECK-NEXT:    ja .LBB5_1
 ; CHECK-NEXT:  # %bb.3: # %while.end
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: BinarySearch:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rsi), %eax
+; CHECK-FORCEALL-NEXT:    jmp .LBB5_2
+; CHECK-FORCEALL-NEXT:  .LBB5_1: # %while.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %ecx, %eax
+; CHECK-FORCEALL-NEXT:    xorl %ecx, %ecx
+; CHECK-FORCEALL-NEXT:    btl %eax, %edi
+; CHECK-FORCEALL-NEXT:    setae %cl
+; CHECK-FORCEALL-NEXT:    movq 8(%rdx,%rcx,8), %rdx
+; CHECK-FORCEALL-NEXT:  .LBB5_2: # %while.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movl (%rdx), %ecx
+; CHECK-FORCEALL-NEXT:    cmpl %ecx, %eax
+; CHECK-FORCEALL-NEXT:    ja .LBB5_1
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.end
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -477,6 +635,39 @@ define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0
 ; CHECK-NEXT:    ja .LBB6_2
 ; CHECK-NEXT:  .LBB6_5: # %while.end
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: Transform:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movb $1, %al
+; CHECK-FORCEALL-NEXT:    testb %al, %al
+; CHECK-FORCEALL-NEXT:    jne .LBB6_5
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
+; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
+; CHECK-FORCEALL-NEXT:  .LBB6_2: # %while.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    movslq %esi, %rsi
+; CHECK-FORCEALL-NEXT:    movl (%rdi,%rsi,4), %eax
+; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
+; CHECK-FORCEALL-NEXT:    divl %r8d
+; CHECK-FORCEALL-NEXT:    movl %eax, %edx
+; CHECK-FORCEALL-NEXT:    movl $11, %eax
+; CHECK-FORCEALL-NEXT:    movl %r8d, %ecx
+; CHECK-FORCEALL-NEXT:    cmpl %r8d, %edx
+; CHECK-FORCEALL-NEXT:    ja .LBB6_4
+; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-FORCEALL-NEXT:    movl $22, %eax
+; CHECK-FORCEALL-NEXT:    movl $22, %ecx
+; CHECK-FORCEALL-NEXT:  .LBB6_4: # %while.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
+; CHECK-FORCEALL-NEXT:    divl %ecx
+; CHECK-FORCEALL-NEXT:    movl %edx, (%rdi,%rsi,4)
+; CHECK-FORCEALL-NEXT:    addl $1, %esi
+; CHECK-FORCEALL-NEXT:    cmpl %r9d, %esi
+; CHECK-FORCEALL-NEXT:    ja .LBB6_2
+; CHECK-FORCEALL-NEXT:  .LBB6_5: # %while.end
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -512,6 +703,15 @@ define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-NEXT:    movl (%rcx), %eax
 ; CHECK-NEXT:  .LBB7_2: # %entry
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB7_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
+; CHECK-FORCEALL-NEXT:  .LBB7_2: # %entry
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %load = load i32, i32* %y
@@ -530,6 +730,15 @@ define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y)
 ; CHECK-NEXT:    movl (%rcx), %eax
 ; CHECK-NEXT:  .LBB8_2: # %entry
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB8_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
+; CHECK-FORCEALL-NEXT:  .LBB8_2: # %entry
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %load = load i32, i32* %y
@@ -554,6 +763,20 @@ define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #
 ; CHECK-NEXT:    addl %r8d, %eax
 ; CHECK-NEXT:    addl %edx, %eax
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB9_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %r8d
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:    movl %esi, %edx
+; CHECK-FORCEALL-NEXT:  .LBB9_2: # %entry
+; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
+; CHECK-FORCEALL-NEXT:    addl %edx, %eax
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %y = load i32, i32* %y.ptr
@@ -581,6 +804,20 @@ define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr)
 ; CHECK-NEXT:    addl %r8d, %eax
 ; CHECK-NEXT:    addl %edx, %eax
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edx, %eax
+; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    jbe .LBB10_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %r8d
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:    movl %esi, %edx
+; CHECK-FORCEALL-NEXT:  .LBB10_2: # %entry
+; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
+; CHECK-FORCEALL-NEXT:    addl %edx, %eax
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %y = load i32, i32* %y.ptr
@@ -603,6 +840,14 @@ define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y
 ; CHECK-NEXT:    cmoval (%r8), %edx
 ; CHECK-NEXT:    addl %edx, %eax
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
+; CHECK-FORCEALL-NEXT:    cmoval %edx, %eax
+; CHECK-FORCEALL-NEXT:    cmoval (%r8), %edx
+; CHECK-FORCEALL-NEXT:    addl %edx, %eax
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %y1 = load i32, i32* %y1.ptr
@@ -626,6 +871,15 @@ define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x
 ; CHECK-NEXT:    movl (%rcx), %eax
 ; CHECK-NEXT:  .LBB12_2: # %entry
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB12_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
+; CHECK-FORCEALL-NEXT:  .LBB12_2: # %entry
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %p = select i1 %cond, i32* %x, i32* %y
@@ -647,6 +901,16 @@ define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %
 ; CHECK-NEXT:    movl (%rax), %eax
 ; CHECK-NEXT:  .LBB13_2: # %entry
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB13_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movq (%rcx), %rax
+; CHECK-FORCEALL-NEXT:    movl (%rax), %eax
+; CHECK-FORCEALL-NEXT:  .LBB13_2: # %entry
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %load1 = load i32*, i32** %y
@@ -669,6 +933,15 @@ define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %
 ; CHECK-NEXT:    movl (%rcx), %eax
 ; CHECK-NEXT:  .LBB14_2: # %entry
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movl %edi, %eax
+; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
+; CHECK-FORCEALL-NEXT:    ja .LBB14_2
+; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
+; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
+; CHECK-FORCEALL-NEXT:  .LBB14_2: # %entry
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
   %p = select i1 %cond, i32* %x, i32* %y
@@ -715,6 +988,39 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    jl .LBB15_1
 ; CHECK-NEXT:  # %bb.6: # %exit
 ; CHECK-NEXT:    retq
+; CHECK-FORCEALL-LABEL: test_memoperand_loop:
+; CHECK-FORCEALL:       # %bb.0: # %entry
+; CHECK-FORCEALL-NEXT:    movq begin at GOTPCREL(%rip), %r8
+; CHECK-FORCEALL-NEXT:    movq (%r8), %rax
+; CHECK-FORCEALL-NEXT:    movq end at GOTPCREL(%rip), %rcx
+; CHECK-FORCEALL-NEXT:    movq (%rcx), %rdx
+; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
+; CHECK-FORCEALL-NEXT:    movq %rax, %rcx
+; CHECK-FORCEALL-NEXT:  .LBB15_1: # %loop.body
+; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-FORCEALL-NEXT:    addq $8, %rcx
+; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
+; CHECK-FORCEALL-NEXT:    ja .LBB15_3
+; CHECK-FORCEALL-NEXT:  # %bb.2: # %loop.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
+; CHECK-FORCEALL-NEXT:    movq (%r8), %rcx
+; CHECK-FORCEALL-NEXT:  .LBB15_3: # %loop.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %edi, (%rcx)
+; CHECK-FORCEALL-NEXT:    addq $8, %rcx
+; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
+; CHECK-FORCEALL-NEXT:    ja .LBB15_5
+; CHECK-FORCEALL-NEXT:  # %bb.4: # %loop.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
+; CHECK-FORCEALL-NEXT:    movq %rax, %rcx
+; CHECK-FORCEALL-NEXT:  .LBB15_5: # %loop.body
+; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
+; CHECK-FORCEALL-NEXT:    movl %edi, (%rcx)
+; CHECK-FORCEALL-NEXT:    addl $1, %esi
+; CHECK-FORCEALL-NEXT:    cmpl $1024, %esi # imm = 0x400
+; CHECK-FORCEALL-NEXT:    jl .LBB15_1
+; CHECK-FORCEALL-NEXT:  # %bb.6: # %exit
+; CHECK-FORCEALL-NEXT:    retq
 entry:
   %begin = load i32*, i32** @begin, align 8
   %end = load i32*, i32** @end, align 8


        


More information about the llvm-commits mailing list