[llvm] r288428 - RegisterCoalscer: Only coalesce complete reserved registers.

Thu Dec 1 14:39:52 PST 2016

Author: matze
Date: Thu Dec  1 16:39:51 2016
New Revision: 288428

URL: http://llvm.org/viewvc/llvm-project?rev=288428&view=rev
Log:
RegisterCoalscer: Only coalesce complete reserved registers.

The coalescer eliminates copies from reserved registers of the form:
   %vregX = COPY %rY
in the case where %rY is a reserved register. However this turns out to
be invalid if only some of the subregisters are reserved (see also
https://reviews.llvm.org/D26648).

Differential Revision: https://reviews.llvm.org/D26687

Added:
    llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir
Removed:
    llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir
Modified:
    llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
    llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll

Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp?rev=288428&r1=288427&r2=288428&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Thu Dec  1 16:39:51 2016
@@ -1571,11 +1571,17 @@ bool RegisterCoalescer::joinReservedPhys
   // Deny any overlapping intervals.  This depends on all the reserved
   // register live ranges to look like dead defs.
   if (!MRI->isConstantPhysReg(DstReg)) {
-    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
+    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+      // Abort if not all the regunits are reserved.
+      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+        if (!MRI->isReserved(*RI))
+          return false;
+      }
       if (RHS.overlaps(LIS->getRegUnit(*UI))) {
         DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
         return false;
       }
+    }
   }
 
   // Skip any value computations, we are not adding new values to the

Removed: llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir?rev=288427&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir (removed)
@@ -1,31 +0,0 @@
-# RUN: llc -mtriple=aarch64-- -run-pass=simple-register-coalescing %s -o - | FileCheck %s
---- |
-  define void @func() { ret void }
-...
----
-# Check that we eliminate copies to/from constant physregs regardless of
-# "interfering" reads/writes.
-# CHECK: name: func
-# CHECK-NOT: COPY
-# CHECK: STRWui %wzr, %x1
-# CHECK-NOT: COPY
-# CHECK: STRXui %xzr, %x1
-# CHECK: %wzr = SUBSWri %w1, 0, 0
-name: func
-registers:
-  - { id: 0, class: gpr32 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: gpr32 }
-body: |
-  bb.0:
-    %0 = COPY %wzr
-    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
-    STRWui %0, %x1, 0
-
-    %1 = COPY %xzr
-    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
-    STRXui %1, %x1, 0
-
-    %2 = SUBSWri %w1, 0, 0, implicit-def %nzcv
-    %wzr = COPY %2
-...

Added: llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir?rev=288428&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir Thu Dec  1 16:39:51 2016
@@ -0,0 +1,67 @@
+# RUN: llc -mtriple=aarch64-apple-ios -run-pass=simple-register-coalescing %s -o - | FileCheck %s
+--- |
+  define void @func() { ret void }
+...
+---
+# Check coalescing of COPYs from reserved physregs.
+# CHECK-LABEL: name: func
+name: func
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: gpr64 }
+  - { id: 3, class: gpr32 }
+  - { id: 4, class: gpr64 }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: xseqpairsclass }
+body: |
+  bb.0:
+    ; We usually should not coalesce copies from allocatable physregs.
+    ; CHECK: %0 = COPY %w7
+    ; CHECK: STRWui %0, %x1, 0
+    %0 = COPY %w7
+    STRWui %0, %x1, 0
+
+    ; It is fine to coalesce copies from reserved physregs
+    ; CHECK-NOT: COPY
+    ; CHECK: STRXui %fp, %x1, 0
+    %1 = COPY %fp
+    STRXui %1, %x1, 0
+
+    ; It is not fine to coalesce copies from reserved physregs when they are
+    ; clobbered.
+    ; CHECK: %2 = COPY %fp
+    ; CHECK: STRXui %2, %x1, 0
+    %2 = COPY %fp
+    %fp = SUBXri %fp, 4, 0
+    STRXui %2, %x1, 0
+
+    ; Is is fine to coalesce copies from constant physregs even when they are
+    ; clobbered.
+    ; CHECK-NOT: COPY
+    ; CHECK: STRWui %wzr, %x1
+    %3 = COPY %wzr
+    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    STRWui %3, %x1, 0
+
+    ; Is is fine to coalesce copies from constant physregs even when they are
+    ; clobbered.
+    ; CHECK-NOT: COPY
+    ; CHECK: STRXui %xzr, %x1
+    %4 = COPY %xzr
+    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    STRXui %4, %x1, 0
+
+    ; Coalescing COPYs into constant physregs.
+    ; CHECK: %wzr = SUBSWri %w1, 0, 0
+    %5 = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    %wzr = COPY %5
+
+    ; Only coalesce when the source register is reserved as a whole (this is
+    ; a limitation of the current code which cannot update liveness information
+    ; of the non-reserved part).
+    ; CHECK: %6 = COPY %xzr_x0
+    ; CHECK: HINT 0, implicit %6
+    %6 = COPY %xzr_x0
+    HINT 0, implicit %6
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll?rev=288428&r1=288427&r2=288428&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll Thu Dec  1 16:39:51 2016
@@ -78,40 +78,41 @@ define void @max_12_sgprs_14_input_sgprs
   ret void
 }
 
-; ALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
+; The following test is commented out for now; http://llvm.org/PR31230
+; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
 ; ; Make sure copies for input buffer are not clobbered. This requires
 ; ; swapping the order the registers are copied from what normally
 ; ; happens.
 
-; TOSMEM: s_mov_b32 s5, s11
-; TOSMEM: s_add_u32 m0, s5,
-; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
+; XTOSMEM: s_mov_b32 s5, s11
+; XTOSMEM: s_add_u32 m0, s5,
+; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
 
-; ALL: SGPRBlocks: 2
-; ALL: NumSGPRsForWavesPerEU: 18
-define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
-                                        i32 addrspace(1)* %out2,
-                                        i32 addrspace(1)* %out3,
-                                        i32 addrspace(1)* %out4,
-                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
-  store volatile i32 0, i32* undef
-  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %x.0, i32 addrspace(1)* undef
-  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %x.0, i32 addrspace(1)* undef
-  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
-  store volatile i32 %x.0, i32 addrspace(1)* undef
-  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
-  store volatile i64 %x.3, i64 addrspace(1)* undef
-  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
-
-  store i32 %one, i32 addrspace(1)* %out1
-  store i32 %two, i32 addrspace(1)* %out2
-  store i32 %three, i32 addrspace(1)* %out3
-  store i32 %four, i32 addrspace(1)* %out4
-  ret void
-}
+; XALL: SGPRBlocks: 2
+; XALL: NumSGPRsForWavesPerEU: 18
+;define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
+;                                        i32 addrspace(1)* %out2,
+;                                        i32 addrspace(1)* %out3,
+;                                        i32 addrspace(1)* %out4,
+;                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
+;  store volatile i32 0, i32* undef
+;  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
+;  store volatile i32 %x.0, i32 addrspace(1)* undef
+;  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
+;  store volatile i32 %x.0, i32 addrspace(1)* undef
+;  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
+;  store volatile i32 %x.0, i32 addrspace(1)* undef
+;  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
+;  store volatile i64 %x.3, i64 addrspace(1)* undef
+;  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+;  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
+;
+;  store i32 %one, i32 addrspace(1)* %out1
+;  store i32 %two, i32 addrspace(1)* %out2
+;  store i32 %three, i32 addrspace(1)* %out3
+;  store i32 %four, i32 addrspace(1)* %out4
+;  ret void
+;}
 
 declare i32 @llvm.amdgcn.workgroup.id.x() #1
 declare i32 @llvm.amdgcn.workgroup.id.y() #1