[llvm] 48ab3e7 - [AMDGPU] Avoid SCC clobbering before S_CSELECT_B32
Alexander Timofeev via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 10:08:18 PST 2022
Author: Alexander Timofeev
Date: 2022-11-22T19:08:04+01:00
New Revision: 48ab3e75279a7a7d0221e9dc3b60c59cdf41330d
URL: https://github.com/llvm/llvm-project/commit/48ab3e75279a7a7d0221e9dc3b60c59cdf41330d
DIFF: https://github.com/llvm/llvm-project/commit/48ab3e75279a7a7d0221e9dc3b60c59cdf41330d.diff
LOG: [AMDGPU] Avoid SCC clobbering before S_CSELECT_B32
Frame lowering inserts scalar addition to compute the offset to the
stack objects. This instructions inserted in arbitrary place and may clobber
SCC between its definition and S_CSELECT_B32 instruction. This change
workarounds this particular code pattern. It queries the scavenger for SGPR and
if available saves SCC to it and restore its value after frame lowering code
insertion.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D136169
Added:
llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0f10aceb9888..d031c2808adc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2204,6 +2204,9 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
return false;
}
+ bool NeedSaveSCC =
+ RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
+
Register TmpSReg =
UseSGPR ? TmpReg
: RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
@@ -2221,9 +2224,22 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
FIOp.setIsKill(false);
}
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
- .addReg(FrameReg)
- .addImm(Offset);
+ if (NeedSaveSCC) {
+ assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ .addImm(0)
+ .addReg(TmpSReg);
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ }
if (!UseSGPR)
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
@@ -2231,10 +2247,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (TmpSReg == FrameReg) {
// Undo frame register modification.
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
- FrameReg)
- .addReg(FrameReg)
- .addImm(-Offset);
+ if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
+ MachineBasicBlock::iterator I =
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+ TmpSReg)
+ .addReg(FrameReg)
+ .addImm(-Offset);
+ I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+ TmpSReg)
+ .addImm(0)
+ .addReg(TmpSReg);
+ } else {
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+ FrameReg)
+ .addReg(FrameReg)
+ .addImm(-Offset);
+ }
}
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
new file mode 100644
index 000000000000..1a0882065a2a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=MUBUF %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog -mattr=+enable-flat-scratch %s -o - | FileCheck --check-prefix=FLATSCR %s
+
+---
+name: use_restore_frame_reg
+tracksRegLiveness: true
+
+stack:
+ - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 2, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 3, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 4, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 5, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 6, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 7, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 8, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 9, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 10, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 11, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 12, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 13, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 14, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 15, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 16, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 17, type: default, offset: 0, size: 4, alignment: 8192 }
+ - { id: 18, type: default, offset: 0, size: 4, alignment: 8192 }
+
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ frameOffsetReg: $sgpr33
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ ; MUBUF-LABEL: name: use_restore_frame_reg
+ ; MUBUF: bb.0:
+ ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; MUBUF-NEXT: liveins: $vgpr1, $vgpr2
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 9961728, implicit-def dead $scc
+ ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
+ ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+ ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+ ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
+ ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
+ ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+ ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
+ ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec
+ ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+ ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: bb.1:
+ ; MUBUF-NEXT: successors: %bb.2(0x80000000)
+ ; MUBUF-NEXT: liveins: $vgpr2
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: S_NOP 0
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: bb.2:
+ ; MUBUF-NEXT: liveins: $vgpr2
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -11010048, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
+ ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 9961728, implicit-def dead $scc
+ ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
+ ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; MUBUF-NEXT: S_ENDPGM 0
+ ; FLATSCR-LABEL: name: use_restore_frame_reg
+ ; FLATSCR: bb.0:
+ ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2
+ ; FLATSCR-NEXT: {{ $}}
+ ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 155652, implicit-def dead $scc
+ ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5)
+ ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+ ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+ ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
+ ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
+ ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+ ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -8192, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
+ ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -155648, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
+ ; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+ ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; FLATSCR-NEXT: {{ $}}
+ ; FLATSCR-NEXT: bb.1:
+ ; FLATSCR-NEXT: successors: %bb.2(0x80000000)
+ ; FLATSCR-NEXT: liveins: $vgpr2
+ ; FLATSCR-NEXT: {{ $}}
+ ; FLATSCR-NEXT: S_NOP 0
+ ; FLATSCR-NEXT: {{ $}}
+ ; FLATSCR-NEXT: bb.2:
+ ; FLATSCR-NEXT: liveins: $vgpr2
+ ; FLATSCR-NEXT: {{ $}}
+ ; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -172032, implicit-def dead $scc
+ ; FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
+ ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 155652, implicit-def dead $scc
+ ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5)
+ ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; FLATSCR-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr1
+
+ S_CMP_EQ_U32 0, 0, implicit-def $scc
+ S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+ $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
+ $vgpr0 = V_OR_B32_e32 %stack.18, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 2c4d9f009447..2a0f6d1d79e9 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -1085,7 +1085,9 @@ body: |
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
- ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
; GFX9-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GFX9-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -1102,7 +1104,9 @@ body: |
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
- ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo
; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
; GFX10-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GFX10-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -1187,7 +1191,9 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
- ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
; GFX9-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GFX9-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
@@ -1208,7 +1214,9 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
- ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo
; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
; GFX10-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GFX10-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
@@ -1295,7 +1303,9 @@ body: |
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
- ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
; GFX9-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -1313,7 +1323,9 @@ body: |
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
- ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr32, 8200, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+ ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
+ ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo
; GFX10-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
; GFX10-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; GFX10-FLATSCR-NEXT: {{ $}}
More information about the llvm-commits
mailing list