[PATCH] D45826: AMDGPU: Legalize the operand of SI_INIT_M0

Thu Apr 19 10:25:14 PDT 2018

nhaehnle created this revision.
nhaehnle added reviewers: arsenm, rampitec.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.

This fixes a case where the argument to a sendmsg intrinsic
ends up in a VGPR, for whatever reason.

The underlying performance issue is that a multiplication that
can be an s_mul_i32 is instead needlessly generated as
v_mul_u32_u24, but this is not addressed by this patch.

Change-Id: I61fd4034314d5acdf6074632c30b65364dfa7328


Repository:
  rL LLVM

https://reviews.llvm.org/D45826

Files:
  lib/Target/AMDGPU/SIInstrInfo.cpp
  test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
===================================================================

--- test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
@@ -121,6 +121,21 @@
   ret void
 }
 
+; TODO: This should use s_mul_i32 instead of v_mul_u32_u24 + v_readfirstlane!
+;
+; GCN-LABEL: {{^}}test_mul24:
+; GCN: v_mul_u32_u24_e32
+; GCN: v_readfirstlane_b32
+; GCN: s_mov_b32 m0,
+; GCN: s_sendmsg sendmsg(MSG_INTERRUPT)
+define amdgpu_gs void @test_mul24(i32 inreg %arg) {
+body:
+  %tmp1 = and i32 %arg, 511
+  %tmp2 = mul nuw nsw i32 %tmp1, 12288
+  call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %tmp2)
+  ret void
+}
+
 declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
 declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
 
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3285,6 +3285,13 @@
   unsigned DstReg = MRI.createVirtualRegister(SRC);
   unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
 
+  if (SubRegs == 1) {
+    BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
+            get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
+        .addReg(SrcReg);
+    return DstReg;
+  }
+
   SmallVector<unsigned, 8> SRegs;
   for (unsigned i = 0; i < SubRegs; ++i) {
     unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -3462,6 +3469,14 @@
     return;
   }
 
+  // Legalize SI_INIT_M0
+  if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
+    MachineOperand &Src = MI.getOperand(0);
+    if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
+      Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
+    return;
+  }
+
   // Legalize MIMG and MUBUF/MTBUF for shaders.
   //
   // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45826.143122.patch
Type: text/x-patch
Size: 1889 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180419/1bede290/attachment.bin>