[PATCH] D117720: AMDGPU/GlobalISel: Do not create readfirstlane with non-s32 type
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 21 20:25:51 PST 2022
arsenm updated this revision to Diff 477054.
arsenm added a comment.
Fix v2i16
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D117720/new/
https://reviews.llvm.org/D117720
Files:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=irtranslator %s -o - | FileCheck %s
; Check that we correctly skip over disabled inputs
define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
@@ -109,6 +109,49 @@
ret { i32, i32 } %value
}
+define amdgpu_ps i8 addrspace(3)* @sgpr_return_p3i8(i8 addrspace(3)* %vgpr) {
+ ; CHECK-LABEL: name: sgpr_return_p3i8
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ret i8 addrspace(3)* %vgpr
+}
+
+define amdgpu_ps i8 addrspace(1)* @sgpr_return_p1i8(i8 addrspace(1)* %vgpr) {
+ ; CHECK-LABEL: name: sgpr_return_p1i8
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ret i8 addrspace(1)* %vgpr
+}
+
+define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
+ ; CHECK-LABEL: name: sgpr_return_v2i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ret <2 x i16> %vgpr
+}
+
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
attributes #0 = { nounwind }
Index: llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -99,7 +99,7 @@
def RetCC_SI_Shader : CallingConv<[
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- CCIfType<[i32, i16] , CCAssignToReg<[
+ CCIfType<[i32, i16, v2i16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -70,6 +70,18 @@
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
if (TRI->isSGPRReg(MRI, PhysReg)) {
+ LLT Ty = MRI.getType(ExtReg);
+ LLT S32 = LLT::scalar(32);
+ if (Ty != S32) {
+ // FIXME: We should probably support readfirstlane intrinsics with all
+ // legal 32-bit types.
+ assert(Ty.getSizeInBits() == 32);
+ if (Ty.isPointer())
+ ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
+ else
+ ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
+ }
+
auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
{MRI.getType(ExtReg)}, false)
.addReg(ExtReg);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D117720.477054.patch
Type: text/x-patch
Size: 4660 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221122/9be0b492/attachment.bin>
More information about the llvm-commits
mailing list