[llvm] 9e2e493 - [AMDGPU] All GWS instructions need aligned VGPR on gfx90a

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 1 17:08:14 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-06-01T17:08:03-07:00
New Revision: 9e2e49328f19eeeab63c08721122815a27b2dad5

URL: https://github.com/llvm/llvm-project/commit/9e2e49328f19eeeab63c08721122815a27b2dad5
DIFF: https://github.com/llvm/llvm-project/commit/9e2e49328f19eeeab63c08721122815a27b2dad5.diff

LOG: [AMDGPU] All GWS instructions need aligned VGPR on gfx90a

Fixes: SWDEV-288006

Differential Revision: https://reviews.llvm.org/D103197

Added: 
    llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
    llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2081f0f2b7f54..ff3a1bd13f0ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1390,7 +1390,24 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
 
   if (HasVSrc) {
     Register VSrc = MI.getOperand(1).getReg();
-    MIB.addReg(VSrc);
+
+    if (STI.needsAlignedVGPRs()) {
+      // Add implicit aligned super-reg to force alignment on the data operand.
+      Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
+      Register NewVR =
+          MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass);
+      BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR)
+          .addReg(VSrc, 0, MI.getOperand(1).getSubReg())
+          .addImm(AMDGPU::sub0)
+          .addReg(Undef)
+          .addImm(AMDGPU::sub1);
+      MIB.addReg(NewVR, 0, AMDGPU::sub0);
+      MIB.addReg(NewVR, RegState::Implicit);
+    } else {
+      MIB.addReg(VSrc);
+    }
+
     if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
       return false;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e7b1bd580ade7..8833f18737089 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4222,11 +4222,35 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     return BB;
   }
   case AMDGPU::DS_GWS_INIT:
-  case AMDGPU::DS_GWS_SEMA_V:
   case AMDGPU::DS_GWS_SEMA_BR:
+  case AMDGPU::DS_GWS_BARRIER:
+    if (Subtarget->needsAlignedVGPRs()) {
+      // Add implicit aligned super-reg to force alignment on the data operand.
+      const DebugLoc &DL = MI.getDebugLoc();
+      MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+      const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+      MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+      Register DataReg = Op->getReg();
+      bool IsAGPR = TRI->isAGPR(MRI, DataReg);
+      Register Undef = MRI.createVirtualRegister(
+          IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
+      BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef);
+      Register NewVR =
+          MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
+                                           : &AMDGPU::VReg_64_Align2RegClass);
+      BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR)
+          .addReg(DataReg, 0, Op->getSubReg())
+          .addImm(AMDGPU::sub0)
+          .addReg(Undef)
+          .addImm(AMDGPU::sub1);
+      Op->setReg(NewVR);
+      Op->setSubReg(AMDGPU::sub0);
+      MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
+    }
+    LLVM_FALLTHROUGH;
+  case AMDGPU::DS_GWS_SEMA_V:
   case AMDGPU::DS_GWS_SEMA_P:
   case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
-  case AMDGPU::DS_GWS_BARRIER:
     // A s_waitcnt 0 is required to be the instruction immediately following.
     if (getSubtarget()->hasGWSAutoReplay()) {
       bundleInstWithWaitcnt(MI);

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 342667d566a4f..321a68bf9a47d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4344,6 +4344,28 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (ST.needsAlignedVGPRs() &&
+      (MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
+       MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+       MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) {
+    const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0);
+    Register Reg = Op->getReg();
+    bool Aligned = true;
+    if (Reg.isPhysical()) {
+      Aligned = !(RI.getHWRegIndex(Reg) & 1);
+    } else {
+      const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+      Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
+                !(RI.getChannelFromSubReg(Op->getSubReg()) & 1);
+    }
+
+    if (!Aligned) {
+      ErrInfo = "Subtarget requires even aligned vector registers "
+                "for DS_GWS instructions";
+      return false;
+    }
+  }
+
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
new file mode 100644
index 0000000000000..b99d524d38daf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
+
+; GCN-LABEL: {{^}}gws_init_odd_reg:
+; GFX908-DAG: ds_gws_init v1 gds
+; GFX90A-DAG: ds_gws_init v2 gds
+; GCN-DAG:    ds_gws_init v0 gds
+define amdgpu_ps void @gws_init_odd_reg(<2 x i32> %arg) {
+  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+  call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.0, i32 0)
+  call void @llvm.amdgcn.ds.gws.init(i32 %vgpr.1, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}gws_sema_br_odd_reg:
+; GFX908-DAG: ds_gws_sema_br v1 gds
+; GFX90A-DAG: ds_gws_sema_br v2 gds
+; GCN-DAG:    ds_gws_sema_br v0 gds
+define amdgpu_ps void @gws_sema_br_odd_reg(<2 x i32> %arg) {
+  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+  call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.0, i32 0)
+  call void @llvm.amdgcn.ds.gws.sema.br(i32 %vgpr.1, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}gws_barrier_odd_reg:
+; GFX908-DAG: ds_gws_barrier v1 gds
+; GFX90A-DAG: ds_gws_barrier v2 gds
+; GCN-DAG:    ds_gws_barrier v0 gds
+define amdgpu_ps void @gws_barrier_odd_reg(<2 x i32> %arg) {
+  %vgpr.0 = extractelement <2 x i32> %arg, i32 0
+  %vgpr.1 = extractelement <2 x i32> %arg, i32 1
+  call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.0, i32 0)
+  call void @llvm.amdgcn.ds.gws.barrier(i32 %vgpr.1, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}gws_init_odd_agpr:
+; GFX908-COUNT-2: ds_gws_init v{{[0-9]+}} gds
+; GFX90A-COUNT-2: ds_gws_init {{[va][0-9]?[02468]}} gds
+define amdgpu_ps void @gws_init_odd_agpr(<4 x i32> %arg) {
+bb:
+  %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
+  %agpr.0 = extractelement <4 x i32> %mai, i32 0
+  %agpr.1 = extractelement <4 x i32> %mai, i32 1
+  call void @llvm.amdgcn.ds.gws.init(i32 %agpr.0, i32 0)
+  call void @llvm.amdgcn.ds.gws.init(i32 %agpr.1, i32 0)
+  ret void
+}
+
+declare void @llvm.amdgcn.ds.gws.init(i32, i32)
+declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32)
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32)
+declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)

diff  --git a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir
new file mode 100644
index 0000000000000..c41bf27288261
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir
@@ -0,0 +1,37 @@
+# RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX90A-ERR %s
+
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %0.sub1:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %0.sub3:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_SEMA_BR killed %1.sub1:vreg_64_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_BARRIER killed %2.sub0:vreg_64, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT killed %3:vgpr_32, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions ***
+# GFX90A-ERR: DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+---
+name:            gws_odd_vgpr
+body:             |
+  bb.0:
+    %0:areg_128_align2 = IMPLICIT_DEF
+    DS_GWS_INIT killed %0.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    %0:areg_128_align2 = IMPLICIT_DEF
+    DS_GWS_INIT killed %0.sub3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    DS_GWS_SEMA_BR killed %1.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    %2:vreg_64 = IMPLICIT_DEF
+    DS_GWS_BARRIER killed %2.sub0, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    %3:vgpr_32 = IMPLICIT_DEF
+    DS_GWS_INIT killed %3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    $vgpr1 = IMPLICIT_DEF
+    DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    $agpr1 = IMPLICIT_DEF
+    DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource")
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list