[llvm] r291720 - AMDGPU: Fix shrinking of addc/subb.

Wed Jan 11 14:58:13 PST 2017

Author: arsenm
Date: Wed Jan 11 16:58:12 2017
New Revision: 291720

URL: http://llvm.org/viewvc/llvm-project?rev=291720&view=rev
Log:
AMDGPU: Fix shrinking of addc/subb.

To shrink to VOP2 the input carry must also be VCC.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir

Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=291720&r1=291719&r2=291720&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Wed Jan 11 16:58:12 2017
@@ -90,6 +90,11 @@ static bool canShrink(MachineInstr &MI,
     switch (MI.getOpcode()) {
       default: return false;
 
+      case AMDGPU::V_ADDC_U32_e64:
+      case AMDGPU::V_SUBB_U32_e64:
+        // Additional verification is needed for sdst/src2.
+        return true;
+
       case AMDGPU::V_MAC_F32_e64:
       case AMDGPU::V_MAC_F16_e64:
         if (!isVGPR(Src2, TRI, MRI) ||
@@ -174,7 +179,7 @@ static void copyFlagsToImplicitVCC(Machi
                                    const MachineOperand &Orig) {
 
   for (MachineOperand &Use : MI.implicit_operands()) {
-    if (Use.getReg() == AMDGPU::VCC) {
+    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
       Use.setIsUndef(Orig.isUndef());
       Use.setIsKill(Orig.isKill());
       return;
@@ -459,11 +464,26 @@ bool SIShrinkInstructions::runOnMachineF
       // Check for the bool flag output for instructions like V_ADD_I32_e64.
       const MachineOperand *SDst = TII->getNamedOperand(MI,
                                                         AMDGPU::OpName::sdst);
-      if (SDst && SDst->getReg() != AMDGPU::VCC) {
-        if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
-          MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
 
-        continue;
+      // Check the carry-in operand for v_addc_u32_e64.
+      const MachineOperand *Src2 = TII->getNamedOperand(MI,
+                                                        AMDGPU::OpName::src2);
+
+      if (SDst) {
+        if (SDst->getReg() != AMDGPU::VCC) {
+          if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
+            MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
+          continue;
+        }
+
+        // All of the instructions with carry outs also have an SGPR input in
+        // src2.
+        if (Src2 && Src2->getReg() != AMDGPU::VCC) {
+          if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
+            MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
+
+          continue;
+        }
       }
 
       // We can shrink this instruction
@@ -491,8 +511,6 @@ bool SIShrinkInstructions::runOnMachineF
       if (Src1)
         Inst32.addOperand(*Src1);
 
-      const MachineOperand *Src2 =
-        TII->getNamedOperand(MI, AMDGPU::OpName::src2);
       if (Src2) {
         int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
         if (Op32Src2Idx != -1) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=291720&r1=291719&r2=291720&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Wed Jan 11 16:58:12 2017
@@ -46,6 +46,45 @@
     ret void
   }
 
+  define void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = add i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
+  define void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = add i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
+  define void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+    %tid = call i32 @llvm.amdgcn.workitem.id.x()
+    %tid.ext = sext i32 %tid to i64
+    %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+    %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+    %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+    %a = load volatile i32, i32 addrspace(1)* %a.ptr
+    %b = load volatile i32, i32 addrspace(1)* %b.ptr
+    %result = add i32 %a, %b
+    store volatile i32 %result, i32 addrspace(1)* %out.gep
+    ret void
+  }
+
   declare i32 @llvm.amdgcn.workitem.id.x() #1
 
   attributes #0 = { nounwind }
@@ -303,3 +342,256 @@ body:             |
     S_ENDPGM
 
 ...
+---
+# GCN-LABEL: name: check_addc_src2_vop3{{$}}
+# GCN: %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+name: check_addc_src2_vop3
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %9 = S_MOV_B64 0
+    %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: shrink_addc_vop3{{$}}
+# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+# GCN %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+
+name:            shrink_addc_vop3
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %vcc = S_MOV_B64 0
+    %29, %vcc = V_ADDC_U32_e64 %19, %17, %vcc, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...
+
+---
+# GCN-LABEL: name: shrink_addc_undef_vcc{{$}}
+# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit undef %vcc, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+name:            shrink_addc_undef_vcc
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_32 }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32_xm0 }
+  - { id: 9, class: sreg_64 }
+  - { id: 10, class: sreg_32_xm0 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sgpr_64 }
+  - { id: 13, class: sgpr_128 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_64 }
+  - { id: 16, class: sgpr_128 }
+  - { id: 17, class: vgpr_32 }
+  - { id: 18, class: vreg_64 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32 }
+  - { id: 23, class: sreg_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vreg_64 }
+  - { id: 26, class: vgpr_32 }
+  - { id: 27, class: vreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vgpr_32 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %27 = REG_SEQUENCE %3, 1, %26, 2
+    %10 = S_MOV_B32 61440
+    %11 = S_MOV_B32 0
+    %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+    %13 = REG_SEQUENCE killed %5, 17, %12, 18
+    %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+    %16 = REG_SEQUENCE killed %4, 17, %12, 18
+    %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+    %29, %vcc = V_ADDC_U32_e64 %19, %17, undef %vcc, implicit %exec
+    %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+    BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+    S_ENDPGM
+
+...