[llvm] 9e2e493 - [AMDGPU] All GWS instructions need aligned VGPR on gfx90a
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 1 17:08:14 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-06-01T17:08:03-07:00
New Revision: 9e2e49328f19eeeab63c08721122815a27b2dad5
URL: https://github.com/llvm/llvm-project/commit/9e2e49328f19eeeab63c08721122815a27b2dad5
DIFF: https://github.com/llvm/llvm-project/commit/9e2e49328f19eeeab63c08721122815a27b2dad5.diff
LOG: [AMDGPU] All GWS instructions need aligned VGPR on gfx90a
Fixes: SWDEV-288006
Differential Revision: https://reviews.llvm.org/D103197
Added:
llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2081f0f2b7f54..ff3a1bd13f0ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1390,7 +1390,24 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
if (HasVSrc) {
Register VSrc = MI.getOperand(1).getReg();
- MIB.addReg(VSrc);
+
+ if (STI.needsAlignedVGPRs()) {
+ // Add implicit aligned super-reg to force alignment on the data operand.
+ Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
+ Register NewVR =
+ MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass);
+ BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR)
+ .addReg(VSrc, 0, MI.getOperand(1).getSubReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Undef)
+ .addImm(AMDGPU::sub1);
+ MIB.addReg(NewVR, 0, AMDGPU::sub0);
+ MIB.addReg(NewVR, RegState::Implicit);
+ } else {
+ MIB.addReg(VSrc);
+ }
+
if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e7b1bd580ade7..8833f18737089 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4222,11 +4222,35 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
case AMDGPU::DS_GWS_INIT:
- case AMDGPU::DS_GWS_SEMA_V:
case AMDGPU::DS_GWS_SEMA_BR:
+ case AMDGPU::DS_GWS_BARRIER:
+ if (Subtarget->needsAlignedVGPRs()) {
+ // Add implicit aligned super-reg to force alignment on the data operand.
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+ Register DataReg = Op->getReg();
+ bool IsAGPR = TRI->isAGPR(MRI, DataReg);
+ Register Undef = MRI.createVirtualRegister(
+ IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef);
+ Register NewVR =
+ MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
+ : &AMDGPU::VReg_64_Align2RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR)
+ .addReg(DataReg, 0, Op->getSubReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Undef)
+ .addImm(AMDGPU::sub1);
+ Op->setReg(NewVR);
+ Op->setSubReg(AMDGPU::sub0);
+ MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
+ }
+ LLVM_FALLTHROUGH;
+ case AMDGPU::DS_GWS_SEMA_V:
case AMDGPU::DS_GWS_SEMA_P:
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
- case AMDGPU::DS_GWS_BARRIER:
// A s_waitcnt 0 is required to be the instruction immediately following.
if (getSubtarget()->hasGWSAutoReplay()) {
bundleInstWithWaitcnt(MI);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 342667d566a4f..321a68bf9a47d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4344,6 +4344,28 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (ST.needsAlignedVGPRs() &&
+ (MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+ MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) {
+ const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0);
+ Register Reg = Op->getReg();
+ bool Aligned = true;
+ if (Reg.isPhysical()) {
+ Aligned = !(RI.getHWRegIndex(Reg) & 1);
+ } else {
+ const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+ Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
+ !(RI.getChannelFromSubReg(Op->getSubReg()) & 1);
+ }
+
+ if (!Aligned) {
+ ErrInfo = "Subtarget requires even aligned vector registers "
+ "for DS_GWS instructions";
+ return false;
+ }
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
new file mode 100644
index 0000000000000..b99d524d38daf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
+
+; GCN-LABEL: {{^}}gws_init_odd_reg:
+; GFX908-DAG: ds_gws_init v1 gds
+; GFX90A-DAG: ds_gws_init v2 gds
+; GCN-DAG: ds_gws_init v0 gds
+define amdgpu_ps void @gws_init_odd_reg(<2 x i32> %arg) {
+ %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+ %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+ call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.0, i32 0)
+ call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.1, i32 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}gws_sema_br_odd_reg:
+; GFX908-DAG: ds_gws_sema_br v1 gds
+; GFX90A-DAG: ds_gws_sema_br v2 gds
+; GCN-DAG: ds_gws_sema_br v0 gds
+define amdgpu_ps void @gws_sema_br_odd_reg(<2 x i32> %arg) {
+ %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+ %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+ call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.0, i32 0)
+ call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.1, i32 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}gws_barrier_odd_reg:
+; GFX908-DAG: ds_gws_barrier v1 gds
+; GFX90A-DAG: ds_gws_barrier v2 gds
+; GCN-DAG: ds_gws_barrier v0 gds
+define amdgpu_ps void @gws_barrier_odd_reg(<2 x i32> %arg) {
+ %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+ %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+ call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.0, i32 0)
+ call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.1, i32 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}gws_init_odd_agpr:
+; GFX908-COUNT-2: ds_gws_init v{{[0-9]+}} gds
+; GFX90A-COUNT-2: ds_gws_init {{[va][0-9]?[02468]}} gds
+define amdgpu_ps void @gws_init_odd_agpr(<4 x i32> %arg) {
+bb:
+ %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
+ %agpr.0 = extractelement <4 x i32> %mai, i32 0
+ %agpr.1 = extractelement <4 x i32> %mai, i32 1
+ call void @llvm.amdgcn.ds.gws.init(i32 %agpr.0, i32 0)
+ call void @llvm.amdgcn.ds.gws.init(i32 %agpr.1, i32 0)
+ ret void
+}
+
+declare void @llvm.amdgcn.ds.gws.init(i32, i32)
+declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32)
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32)
+declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir
new file mode 100644
index 0000000000000..c41bf27288261
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir
@@ -0,0 +1,37 @@
+# RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX90A-ERR %s
+
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %0.sub1:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %0.sub3:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_SEMA_BR killed %1.sub1:vreg_64_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_BARRIER killed %2.sub0:vreg_64, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %3:vgpr_32, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+---
+name: gws_odd_vgpr
+body: |
+ bb.0:
+ %0:areg_128_align2 = IMPLICIT_DEF
+ DS_GWS_INIT killed %0.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ %0:areg_128_align2 = IMPLICIT_DEF
+ DS_GWS_INIT killed %0.sub3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ DS_GWS_SEMA_BR killed %1.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ %2:vreg_64 = IMPLICIT_DEF
+ DS_GWS_BARRIER killed %2.sub0, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ %3:vgpr_32 = IMPLICIT_DEF
+ DS_GWS_INIT killed %3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ $vgpr1 = IMPLICIT_DEF
+ DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ $agpr1 = IMPLICIT_DEF
+ DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list