[PATCH] D74987: AMDGPU/GlobalISel: Better code for one case of G_SHUFFLE_VECTOR on v2i16
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 13:25:56 PST 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb72f1448ce42: AMDGPU/GlobalISel: Better code for one case of G_SHUFFLE_VECTOR on v2i16 (authored by foad).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D74987/new/
https://reviews.llvm.org/D74987
Files:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir
@@ -56,8 +56,8 @@
; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
+ ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+ ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
@@ -143,8 +143,8 @@
; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
+ ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+ ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
@@ -424,8 +424,8 @@
; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0
; GFX9: liveins: $sgpr0, $sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
+ ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
+ ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:sgpr(<2 x s16>) = COPY $sgpr1
%2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
@@ -511,8 +511,8 @@
; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2
; GFX9: liveins: $sgpr0, $sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
+ ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
+ ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:sgpr(<2 x s16>) = COPY $sgpr1
%2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2191,7 +2191,17 @@
.addReg(SrcVec)
.addImm(16);
}
- } else if (isZeroOrUndef(Mask[0]) && Mask[1] == 0) {
+ } else if (Mask[0] == -1 && Mask[1] == 0) {
+ if (IsVALU) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg)
+ .addImm(16)
+ .addReg(SrcVec);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg)
+ .addReg(SrcVec)
+ .addImm(16);
+ }
+ } else if (Mask[0] == 0 && Mask[1] == 0) {
if (IsVALU) {
// Write low half of the register into the high half.
MachineInstr *MovSDWA =
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D74987.245979.patch
Type: text/x-patch
Size: 3626 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200221/4e2e99bd/attachment.bin>
More information about the llvm-commits
mailing list