[llvm] r291716 - AMDGPU: Fix breaking VOP3 v_add_i32s

Wed Jan 11 14:35:17 PST 2017

Author: arsenm
Date: Wed Jan 11 16:35:17 2017
New Revision: 291716

URL: http://llvm.org/viewvc/llvm-project?rev=291716&view=rev
Log:
AMDGPU: Fix breaking VOP3 v_add_i32s

This was shrinking the instruction even though the carry output
register was a virtual register, not known VCC.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=291716&r1=291715&r2=291716&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Wed Jan 11 16:35:17 2017
@@ -84,7 +84,7 @@ static bool canShrink(MachineInstr &MI,
   // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
   // a special case for it.  It can only be shrunk if the third operand
   // is vcc.  We should handle this the same way we handle vopc, by addding
-  // a register allocation hint pre-regalloc and then do the shrining
+  // a register allocation hint pre-regalloc and then do the shrinking
   // post-regalloc.
   if (Src2) {
     switch (MI.getOpcode()) {
@@ -456,6 +456,16 @@ bool SIShrinkInstructions::runOnMachineF
           continue;
       }
 
+      // Check for the bool flag output for instructions like V_ADD_I32_e64.
+      const MachineOperand *SDst = TII->getNamedOperand(MI,
+                                                        AMDGPU::OpName::sdst);
+      if (SDst && SDst->getReg() != AMDGPU::VCC) {
+        if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
+          MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
+
+        continue;
+      }
+
       // We can shrink this instruction
       DEBUG(dbgs() << "Shrinking " << MI);
 

Added: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=291716&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Wed Jan 11 16:35:17 2017
@@ -0,0 +1,305 @@
+# RUN: llc -verify-machineinstrs -march=amdgcn -run-pass si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
+# Check that add with carry out isn't incorrectly reduced to e32 when
+# the carry out is a virtual register.
+
+# TODO: We should run this test until the end of codegen to make sure
+# that the post-RA run does manage to shrink it, but right now the
+# resume crashes
+
+--- |
+  define void @shrink_add_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = add i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
+  define void @shrink_sub_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = sub i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
+  define void @shrink_subrev_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = sub i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
+  declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+  attributes #0 = { nounwind }
+  attributes #1 = { nounwind readnone }
+
+...
+---
+# GCN-LABEL: name: shrink_add_vop3{{$}}
+# GCN: %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+name:            shrink_add_vop3
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: shrink_sub_vop3{{$}}
+# GCN: %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+
+name:            shrink_sub_vop3
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: shrink_subrev_vop3{{$}}
+# GCN: %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+
+name:            shrink_subrev_vop3
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...