[llvm] [AMDGPU] Allocate i1 argument to SGPRs (PR #72461)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 15 18:47:47 PST 2023
https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/72461
>From 12b57bdf435fbef74bf1aac2d0d353ff808616d4 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Wed, 15 Nov 2023 19:48:41 -0600
Subject: [PATCH 1/2] [AMDGPU] Allocate i1 argument to SGPRs
Currently i1 arguments are passed as 32-bit VGPRs. It would make more
sense to make use of SGPRs and pass these values as a wavesize bool mask.
---
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 5 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 +++++
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 23 +++++++++
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 6 +++
llvm/test/CodeGen/AMDGPU/z_callee.ll | 33 ++++++++++++
llvm/test/CodeGen/AMDGPU/z_caller.ll | 43 ++++++++++++++++
llvm/test/CodeGen/AMDGPU/z_caller2.ll | 57 +++++++++++++++++++++
7 files changed, 179 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/z_callee.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/z_caller.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/z_caller2.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 9036b26a6f6bcb4..3f18cbd661d5b26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -185,9 +185,12 @@ def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
// Calling convention for leaf functions
def CC_AMDGPU_Func : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
- CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i1] , CCAssignToReg<
+ !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
+ >>,
+
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
!foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
>>>,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35e252fe8d675ea..845a0b999768db7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3469,6 +3469,19 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
+ // In code below (after call of AnalyzeCallOperands),
+ // if (!Subtarget->enableFlatScratch()), it would use either s[48:51] or
+ // s[0:3]. Therefore, before calling AnalyzeCallOperands, we may need to
+ // reserve these registers.
+ if (!Subtarget->enableFlatScratch()) {
+ if (IsChainCallConv)
+ CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{
+ AMDGPU::SGPR48, AMDGPU::SGPR49, AMDGPU::SGPR50, AMDGPU::SGPR51}, 4);
+ else
+ CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{
+ AMDGPU::SGPR0, AMDGPU::SGPR1, AMDGPU::SGPR2, AMDGPU::SGPR3}, 4);
+ }
+
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5f78dfff1e98852..7de9194101d6abf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -852,6 +852,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+ // When calling convention allocates SGPR for i1 argument, we may
+ // have a SRPR_64 to SReg_32 copy for an outgoing i1 argument. Adjust
+ // the copy to avoid illegal copy.
+ if (AMDGPU::SGPR_64RegClass.contains(SrcReg)) {
+ auto sub0 = RI.getSubReg(SrcReg, AMDGPU::sub0);
+ if (sub0 != DestReg)
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg).addReg(sub0);
+ return;
+ }
+
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
@@ -885,6 +895,19 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ // When an i1 argument is allocated to an SGPR_32, we may have a COPY
+ // from SGPR_32 to SReg_64. The following handles this case to avoid
+ // an illegal copy.
+ if(AMDGPU::SGPR_32RegClass.contains(SrcReg)) {
+ auto sub0 = RI.getSubReg(DestReg, AMDGPU::sub0);
+ if (sub0 != SrcReg) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), sub0).addReg(SrcReg);
+ }
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+ RI.getSubReg(DestReg, AMDGPU::sub1)).addImm(0);
+ return;
+ }
+
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 68c8f4024e73007..189333b063e5222 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -513,6 +513,12 @@ bool SILowerI1Copies::lowerCopiesFromI1() {
if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
continue;
+ // When the calling convention allocates i1 argument to SGPR,
+ // we may have a COPY with dst being an SGPR_32. This should
+ // not be lowered into V_CNDMASK_B32.
+ if(AMDGPU::SGPR_32RegClass.contains(DstReg))
+ continue;
+
Changed = true;
// Copy into a 32-bit vector register.
diff --git a/llvm/test/CodeGen/AMDGPU/z_callee.ll b/llvm/test/CodeGen/AMDGPU/z_callee.ll
new file mode 100644
index 000000000000000..2fc4befa279f3e4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/z_callee.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+define void @void_func_i1(i1 %arg0) #0 {
+; For CIGFX89, the i1 arg is passed in s4, but the v_cndmask insn uses s[4:5].
+; Therefore, the "s_mov_b32 s5, 0" is generated.
+;
+; CIGFX89-LABEL: void_func_i1:
+; CIGFX89: ; %bb.0:
+; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CIGFX89-NEXT: s_mov_b32 s5, 0
+; CIGFX89-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
+; CIGFX89-NEXT: s_mov_b32 s6, -1
+; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; CIGFX89-NEXT: s_waitcnt vmcnt(0)
+; CIGFX89-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: void_func_i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-NEXT: s_mov_b32 s2, -1
+; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ store i1 %arg0, ptr addrspace(1) undef
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/z_caller.ll b/llvm/test/CodeGen/AMDGPU/z_caller.ll
new file mode 100644
index 000000000000000..faf25e407fca2c6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/z_caller.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+
+declare hidden void @external_void_func_i1(i1) #0
+
+define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
+; GFX9-LABEL: test_call_external_void_func_i1_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s4, -1
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_getpc_b64 s[8:9]
+; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_i1 at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_i1 at rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GFX9-NEXT: s_endpgm
+;
+; GFX11-LABEL: test_call_external_void_func_i1_imm:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GFX11-NEXT: s_mov_b32 s0, -1
+; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: s_getpc_b64 s[2:3]
+; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1 at rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1 at rel32@hi+12
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT: s_endpgm
+ call void @external_void_func_i1(i1 true)
+ ret void
+}
+
+attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/z_caller2.ll b/llvm/test/CodeGen/AMDGPU/z_caller2.ll
new file mode 100644
index 000000000000000..e63ae50b7e91cd2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/z_caller2.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+
+declare hidden void @external_void_func_i1_signext(i1 signext) #0
+
+define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_i1_signext:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-NEXT: s_mov_b32 s2, -1
+; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_getpc_b64 s[8:9]
+; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_i1_signext at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_i1_signext at rel32@hi+12
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GFX9-NEXT: s_endpgm
+;
+; GFX11-LABEL: test_call_external_void_func_i1_signext:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-NEXT: s_mov_b32 s2, -1
+; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: s_getpc_b64 s[4:5]
+; GFX11-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
+; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 1, v0
+; GFX11-NEXT: s_mov_b32 s0, s2
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX11-NEXT: s_endpgm
+ %var = load volatile i1, ptr addrspace(1) undef
+ call void @external_void_func_i1_signext(i1 signext %var)
+ ret void
+}
+
+
+
+attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
>From 3aa0909f824a38ea0e7ec57382e18147d8f52f5c Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Wed, 15 Nov 2023 20:37:27 -0600
Subject: [PATCH 2/2] Fix format.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 +++++++----
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 13 +++++++------
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 2 +-
3 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 845a0b999768db7..83e8bbcd2ab3ea5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3475,11 +3475,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// reserve these registers.
if (!Subtarget->enableFlatScratch()) {
if (IsChainCallConv)
- CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{
- AMDGPU::SGPR48, AMDGPU::SGPR49, AMDGPU::SGPR50, AMDGPU::SGPR51}, 4);
+ CCInfo.AllocateRegBlock(
+ ArrayRef<MCPhysReg>{AMDGPU::SGPR48, AMDGPU::SGPR49, AMDGPU::SGPR50,
+ AMDGPU::SGPR51},
+ 4);
else
- CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{
- AMDGPU::SGPR0, AMDGPU::SGPR1, AMDGPU::SGPR2, AMDGPU::SGPR3}, 4);
+ CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{AMDGPU::SGPR0, AMDGPU::SGPR1,
+ AMDGPU::SGPR2, AMDGPU::SGPR3},
+ 4);
}
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7de9194101d6abf..f44ff987f9ac374 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -867,7 +867,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -882,13 +882,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AMDGPU::VCC) {
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
return;
@@ -898,13 +898,14 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// When an i1 argument is allocated to an SGPR_32, we may have a COPY
// from SGPR_32 to SReg_64. The following handles this case to avoid
// an illegal copy.
- if(AMDGPU::SGPR_32RegClass.contains(SrcReg)) {
+ if (AMDGPU::SGPR_32RegClass.contains(SrcReg)) {
auto sub0 = RI.getSubReg(DestReg, AMDGPU::sub0);
if (sub0 != SrcReg) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), sub0).addReg(SrcReg);
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- RI.getSubReg(DestReg, AMDGPU::sub1)).addImm(0);
+ RI.getSubReg(DestReg, AMDGPU::sub1))
+ .addImm(0);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 189333b063e5222..161ea3ddc0435c4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -516,7 +516,7 @@ bool SILowerI1Copies::lowerCopiesFromI1() {
// When the calling convention allocates i1 argument to SGPR,
// we may have a COPY with dst being an SGPR_32. This should
// not be lowered into V_CNDMASK_B32.
- if(AMDGPU::SGPR_32RegClass.contains(DstReg))
+ if (AMDGPU::SGPR_32RegClass.contains(DstReg))
continue;
Changed = true;
More information about the llvm-commits
mailing list