[llvm] r361156 - RegAlloc: Fix verifier error with undef identity copies

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 07:09:37 PDT 2019


Author: arsenm
Date: Mon May 20 07:09:36 2019
New Revision: 361156

URL: http://llvm.org/viewvc/llvm-project?rev=361156&view=rev
Log:
RegAlloc: Fix verifier error with undef identity copies

The code did not match the example in the comment, and was checking
the undef flag on the copy dest instead of source. The existing tests
were only hitting the > 2 operands case.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
Modified:
    llvm/trunk/lib/CodeGen/VirtRegMap.cpp

Modified: llvm/trunk/lib/CodeGen/VirtRegMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegMap.cpp?rev=361156&r1=361155&r2=361156&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/VirtRegMap.cpp (original)
+++ llvm/trunk/lib/CodeGen/VirtRegMap.cpp Mon May 20 07:09:36 2019
@@ -384,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy
   // give us additional liveness information: The target (super-)register
   // must not be valid before this point. Replace the COPY with a KILL
   // instruction to maintain this information.
-  if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+  if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
     MI.setDesc(TII->get(TargetOpcode::KILL));
     LLVM_DEBUG(dbgs() << "  replace by: " << MI);
     return;

Added: llvm/trunk/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir?rev=361156&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir Mon May 20 07:09:36 2019
@@ -0,0 +1,69 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy -stop-after=virtregrewriter -verify-machineinstrs -o - %s | FileCheck %s
+
+# The undef copy of %4 is allocated to $vgpr3, and the identity copy
+# was deleted, and $vgpr3 was considered undef. The code to replace
+# the undef copy with a kill was incorrectly checking the dest
+# operand, rather than the source.
+
+--- |
+  define amdgpu_kernel void @undef_identity_copy() {
+    ret void
+  }
+
+  declare hidden float @bar(<4 x float>)
+  declare hidden void @foo()
+
+...
+---
+name:            undef_identity_copy
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  hasCalls:        true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  scratchWaveOffsetReg: '$sgpr95'
+  frameOffsetReg:  '$sgpr95'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: undef_identity_copy
+    ; CHECK: renamable $vgpr32_vgpr33_vgpr34_vgpr35 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
+    ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
+    ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    ; CHECK: $sgpr4 = COPY $sgpr95
+    ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
+    ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
+    ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    ; CHECK: $sgpr4 = COPY $sgpr95
+    ; CHECK: $vgpr0 = COPY renamable $vgpr32
+    ; CHECK: $vgpr1 = COPY renamable $vgpr33
+    ; CHECK: $vgpr2 = COPY renamable $vgpr34
+    ; CHECK: $vgpr3 = KILL undef renamable $vgpr3
+    ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
+    ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    ; CHECK: S_ENDPGM 0
+    %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
+    %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
+    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    $sgpr4 = COPY $sgpr95
+    dead $sgpr30_sgpr31 = SI_CALL %2, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
+    ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
+    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    $sgpr4 = COPY $sgpr95
+    $vgpr0 = COPY %0.sub0
+    $vgpr1 = COPY %0.sub1
+    $vgpr2 = COPY %0.sub2
+    $vgpr3 = COPY undef %4:vgpr_32
+    dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
+    %5:vgpr_32 = COPY $vgpr0
+    ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
+    FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    S_ENDPGM 0
+
+...




More information about the llvm-commits mailing list