[PATCH] D117720: AMDGPU/GlobalISel: Do not create readfirstlane with non-s32 type

Wed Jan 19 13:37:31 PST 2022

arsenm created this revision.
arsenm added reviewers: AMDGPU, foad, mbrkusanin, Petar.Avramovic.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, rovka, yaxunl, nhaehnle, jvesely, kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

We should probably handle any 32-bit type here, but the intrinsic
definition and selection pattern currently do not. Avoids a few lit
tests failures when switched on by default.


https://reviews.llvm.org/D117720

Files:
  llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll


Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=irtranslator %s -o - | FileCheck %s
 
 ; Check that we correctly skip over disabled inputs
 define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
@@ -109,6 +109,40 @@
   ret { i32, i32 } %value
 }
 
+define amdgpu_ps i8 addrspace(3)* @sgpr_return_p3i8(i8 addrspace(3)* %vgpr) {
+  ; CHECK-LABEL: name: sgpr_return_p3i8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ret i8 addrspace(3)* %vgpr
+}
+
+define amdgpu_ps i8 addrspace(1)* @sgpr_return_p1i8(i8 addrspace(1)* %vgpr) {
+  ; CHECK-LABEL: name: sgpr_return_p1i8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+  ; CHECK-NEXT:   $sgpr1 = COPY [[INT1]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ret i8 addrspace(1)* %vgpr
+}
+
+; define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
+;   ret <2 x i16> %vgpr
+; }
+
 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)  #0
 
 attributes #0 = { nounwind }
Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -69,6 +69,18 @@
     const SIRegisterInfo *TRI
       = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
     if (TRI->isSGPRReg(MRI, PhysReg)) {
+      LLT Ty = MRI.getType(ExtReg);
+      LLT S32 = LLT::scalar(32);
+      if (Ty != S32) {
+        // FIXME: We should probably support readfirstlane intrinsics with all
+        // legal 32-bit types.
+        assert(Ty.getSizeInBits() == 32);
+        if (Ty.isPointer())
+          ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
+        else
+          ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
+      }
+
       auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
                                               {MRI.getType(ExtReg)}, false)
         .addReg(ExtReg);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D117720.401389.patch
Type: text/x-patch
Size: 3562 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220119/8702ff89/attachment-0001.bin>