[llvm] r348330 - [MachineLICM][X86][AMDGPU] Fix subtle bug in the updating of PhysRegClobbers in post-RA LICM

Tue Dec 4 19:41:26 PST 2018

Author: ctopper
Date: Tue Dec  4 19:41:26 2018
New Revision: 348330

URL: http://llvm.org/viewvc/llvm-project?rev=348330&view=rev
Log:
[MachineLICM][X86][AMDGPU] Fix subtle bug in the updating of PhysRegClobbers in post-RA LICM

It looks like MCRegAliasIterator can visit the same physical register twice. When this happens in this code in LICM we end up setting the PhysRegDef and then later in the same loop visit the register again. Now we see that PhysRegDef is set from the earlier iteration so now set PhysRegClobber.

This patch splits the loop so we have one that uses the previous value of PhysRegDef to update PhysRegClobber and second loop that updates PhysRegDef.

The X86 atomic test is an improvement. I had to add sideeffect to the two shrink wrapping tests to prevent hoisting from occurring. I'm not sure about the AMDGPU tests. It looks like the branch instruction changed at end the of the loops. And in the branch-relaxation test I think there is now "and vcc, exec, -1" instruction that wasn't there before.

Differential Revision: https://reviews.llvm.org/D55102

Modified:
    llvm/trunk/lib/CodeGen/MachineLICM.cpp
    llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
    llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
    llvm/trunk/test/CodeGen/X86/atomic_mi.ll
    llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll

Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Tue Dec  4 19:41:26 2018
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineI
     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
       if (PhysRegDefs.test(*AS))
         PhysRegClobbers.set(*AS);
-      PhysRegDefs.set(*AS);
     }
+    // Need a second loop because MCRegAliasIterator can visit the same
+    // register twice.
+    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+      PhysRegDefs.set(*AS);
+
     if (PhysRegClobbers.test(Reg))
       // MI defined register is seen defined by another instruction in
       // the loop, it cannot be a LICM candidate.

Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Tue Dec  4 19:41:26 2018
@@ -444,7 +444,7 @@ endif:
 ; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
 
-; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
 ; GCN: ;;#ASMSTART
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
@@ -453,7 +453,7 @@ endif:
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
 ; GCN: ;;#ASMEND
-; GCN: s_cbranch_execz [[RET]]
+; GCN: s_cbranch_vccz [[RET]]
 
 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1

Modified: llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll Tue Dec  4 19:41:26 2018
@@ -32,11 +32,11 @@ loop:
 ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
 
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
-; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_and_b64 vcc, exec, -1
+; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: buffer_store_dword [[REG]]
-; SI: s_cbranch_execnz [[LOOP]]
+; SI: s_cbranch_vccnz [[LOOP]]
 
 ; SI: [[RET]]:  ; %UnifiedReturnBlock
 ; SI: s_endpgm

Modified: llvm/trunk/test/CodeGen/X86/atomic_mi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic_mi.ll?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic_mi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic_mi.ll Tue Dec  4 19:41:26 2018
@@ -93,11 +93,11 @@ define void @store_atomic_imm_64(i64* %p
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB3_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB3_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -132,11 +132,11 @@ define void @store_atomic_imm_64_big(i64
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    movl $23, %ecx
+; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB4_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    movl $23, %ecx
-; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB4_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -753,10 +753,10 @@ define void @and_64i(i64* %p) {
 ; X32-NEXT:    andl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB31_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB31_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end

Modified: llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll Tue Dec  4 19:41:26 2018
@@ -126,7 +126,7 @@ for.preheader:
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -178,7 +178,7 @@ for.preheader:
 for.body:                                         ; preds = %for.body, %entry
   %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
   %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.03
   %inc = add nuw nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -248,7 +248,7 @@ for.preheader:
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -324,7 +324,7 @@ if.then:
 for.body:                                         ; preds = %for.body, %if.then
   %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
   %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10

Modified: llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll?rev=348330&r1=348329&r2=348330&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-win64-shrink-wrapping.ll Tue Dec  4 19:41:26 2018
@@ -100,7 +100,7 @@ for.preheader:
 for.body:                                         ; preds = %for.body, %for.preheader
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10