[llvm] r291720 - AMDGPU: Fix shrinking of addc/subb.
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 11 14:58:13 PST 2017
Author: arsenm
Date: Wed Jan 11 16:58:12 2017
New Revision: 291720
URL: http://llvm.org/viewvc/llvm-project?rev=291720&view=rev
Log:
AMDGPU: Fix shrinking of addc/subb.
To shrink to VOP2 the input carry must also be VCC.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=291720&r1=291719&r2=291720&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Wed Jan 11 16:58:12 2017
@@ -90,6 +90,11 @@ static bool canShrink(MachineInstr &MI,
switch (MI.getOpcode()) {
default: return false;
+ case AMDGPU::V_ADDC_U32_e64:
+ case AMDGPU::V_SUBB_U32_e64:
+ // Additional verification is needed for sdst/src2.
+ return true;
+
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
if (!isVGPR(Src2, TRI, MRI) ||
@@ -174,7 +179,7 @@ static void copyFlagsToImplicitVCC(Machi
const MachineOperand &Orig) {
for (MachineOperand &Use : MI.implicit_operands()) {
- if (Use.getReg() == AMDGPU::VCC) {
+ if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
Use.setIsUndef(Orig.isUndef());
Use.setIsKill(Orig.isKill());
return;
@@ -459,11 +464,26 @@ bool SIShrinkInstructions::runOnMachineF
// Check for the bool flag output for instructions like V_ADD_I32_e64.
const MachineOperand *SDst = TII->getNamedOperand(MI,
AMDGPU::OpName::sdst);
- if (SDst && SDst->getReg() != AMDGPU::VCC) {
- if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
- MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
- continue;
+ // Check the carry-in operand for v_addc_u32_e64.
+ const MachineOperand *Src2 = TII->getNamedOperand(MI,
+ AMDGPU::OpName::src2);
+
+ if (SDst) {
+ if (SDst->getReg() != AMDGPU::VCC) {
+ if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
+ MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
+ continue;
+ }
+
+ // All of the instructions with carry outs also have an SGPR input in
+ // src2.
+ if (Src2 && Src2->getReg() != AMDGPU::VCC) {
+ if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
+ MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
+
+ continue;
+ }
}
// We can shrink this instruction
@@ -491,8 +511,6 @@ bool SIShrinkInstructions::runOnMachineF
if (Src1)
Inst32.addOperand(*Src1);
- const MachineOperand *Src2 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
if (Src2) {
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
if (Op32Src2Idx != -1) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=291720&r1=291719&r2=291720&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Wed Jan 11 16:58:12 2017
@@ -46,6 +46,45 @@
ret void
}
+ define void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+ %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile i32, i32 addrspace(1)* %a.ptr
+ %b = load volatile i32, i32 addrspace(1)* %b.ptr
+ %result = add i32 %a, %b
+ store volatile i32 %result, i32 addrspace(1)* %out.gep
+ ret void
+ }
+
+ define void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+ %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile i32, i32 addrspace(1)* %a.ptr
+ %b = load volatile i32, i32 addrspace(1)* %b.ptr
+ %result = add i32 %a, %b
+ store volatile i32 %result, i32 addrspace(1)* %out.gep
+ ret void
+ }
+
+ define void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
+ %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile i32, i32 addrspace(1)* %a.ptr
+ %b = load volatile i32, i32 addrspace(1)* %b.ptr
+ %result = add i32 %a, %b
+ store volatile i32 %result, i32 addrspace(1)* %out.gep
+ ret void
+ }
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
@@ -303,3 +342,256 @@ body: |
S_ENDPGM
...
+---
+# GCN-LABEL: name: check_addc_src2_vop3{{$}}
+# GCN: %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+name: check_addc_src2_vop3
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_64_xexec }
+ - { id: 6, class: sreg_32 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_64 }
+ - { id: 10, class: sreg_32_xm0 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sgpr_64 }
+ - { id: 13, class: sgpr_128 }
+ - { id: 14, class: sreg_32_xm0 }
+ - { id: 15, class: sreg_64 }
+ - { id: 16, class: sgpr_128 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vreg_64 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: sreg_32_xm0 }
+ - { id: 22, class: sreg_32 }
+ - { id: 23, class: sreg_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vreg_64 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vreg_64 }
+ - { id: 28, class: vreg_64 }
+ - { id: 29, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %3 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+ %27 = REG_SEQUENCE %3, 1, %26, 2
+ %10 = S_MOV_B32 61440
+ %11 = S_MOV_B32 0
+ %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+ %13 = REG_SEQUENCE killed %5, 17, %12, 18
+ %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+ %16 = REG_SEQUENCE killed %4, 17, %12, 18
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+ %9 = S_MOV_B64 0
+ %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
+ %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+ S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: shrink_addc_vop3{{$}}
+# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+# GCN %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+
+name: shrink_addc_vop3
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_64_xexec }
+ - { id: 6, class: sreg_32 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_64 }
+ - { id: 10, class: sreg_32_xm0 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sgpr_64 }
+ - { id: 13, class: sgpr_128 }
+ - { id: 14, class: sreg_32_xm0 }
+ - { id: 15, class: sreg_64 }
+ - { id: 16, class: sgpr_128 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vreg_64 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: sreg_32_xm0 }
+ - { id: 22, class: sreg_32 }
+ - { id: 23, class: sreg_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vreg_64 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vreg_64 }
+ - { id: 28, class: vreg_64 }
+ - { id: 29, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %3 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+ %27 = REG_SEQUENCE %3, 1, %26, 2
+ %10 = S_MOV_B32 61440
+ %11 = S_MOV_B32 0
+ %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+ %13 = REG_SEQUENCE killed %5, 17, %12, 18
+ %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+ %16 = REG_SEQUENCE killed %4, 17, %12, 18
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+ %vcc = S_MOV_B64 0
+ %29, %vcc = V_ADDC_U32_e64 %19, %17, %vcc, implicit %exec
+ %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+ S_ENDPGM
+
+...
+
+---
+# GCN-LABEL: name: shrink_addc_undef_vcc{{$}}
+# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit undef %vcc, implicit %exec
+# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+name: shrink_addc_undef_vcc
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_64_xexec }
+ - { id: 6, class: sreg_32 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_64 }
+ - { id: 10, class: sreg_32_xm0 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sgpr_64 }
+ - { id: 13, class: sgpr_128 }
+ - { id: 14, class: sreg_32_xm0 }
+ - { id: 15, class: sreg_64 }
+ - { id: 16, class: sgpr_128 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vreg_64 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: sreg_32_xm0 }
+ - { id: 22, class: sreg_32 }
+ - { id: 23, class: sreg_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vreg_64 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vreg_64 }
+ - { id: 28, class: vreg_64 }
+ - { id: 29, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %3 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+ %27 = REG_SEQUENCE %3, 1, %26, 2
+ %10 = S_MOV_B32 61440
+ %11 = S_MOV_B32 0
+ %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
+ %13 = REG_SEQUENCE killed %5, 17, %12, 18
+ %28 = V_LSHL_B64 killed %27, 2, implicit %exec
+ %16 = REG_SEQUENCE killed %4, 17, %12, 18
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
+ %29, %vcc = V_ADDC_U32_e64 %19, %17, undef %vcc, implicit %exec
+ %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list