[PATCH] D116803: [GlobalISel] Use getPreferredShiftAmountTy in one more G_UBFX combine
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 7 05:03:44 PST 2022
foad created this revision.
foad added reviewers: bcahoon, aemerson, arsenm, paquette.
Herald added subscribers: kerbowa, hiraditya, tpr, rovka, nhaehnle, jvesely.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
Change CombinerHelper::matchBitfieldExtractFromShrAnd to use
getPreferredShiftAmountTy for the shift-amount-like operands of G_UBFX
just like all the other G_[SU]BFX combines do. This better matches the
AMDGPU legality rules for these instructions.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D116803
Files:
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
@@ -121,22 +121,24 @@
}
; Test vector bitfield extract for 64-bits.
+; TODO: No need for a 64-bit shift instruction when the extracted value is
+; entirely contained within the upper or lower half.
define i64 @v_mask_srl_i64(i64 %value) {
; GFX89-LABEL: v_mask_srl_i64:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_and_b32_e32 v0, 0xfe000000, v0
-; GFX89-NEXT: v_and_b32_e32 v1, 7, v1
; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
+; GFX89-NEXT: v_mov_b32_e32 v1, 0
+; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mask_srl_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_and_b32_e32 v0, 0xfe000000, v0
-; GFX10-NEXT: v_and_b32_e32 v1, 7, v1
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10
; GFX10-NEXT: s_setpc_b64 s[30:31]
%1 = and i64 %value, 34326183936 ; 1023 << 25
%2 = lshr i64 %1, 25
@@ -147,10 +149,7 @@
define amdgpu_ps i64 @s_mask_srl_i64(i64 inreg %value) {
; GCN-LABEL: s_mask_srl_i64:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s2, 0xfe000000
-; GCN-NEXT: s_mov_b32 s3, 7
-; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
-; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 25
+; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019
; GCN-NEXT: ; return to shader part epilog
%1 = and i64 %value, 34326183936 ; 1023 << 25
%2 = lshr i64 %1, 25
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir
@@ -145,11 +145,10 @@
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 261888
- ; GCN-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s32)
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CONSTANT i64 261888 ; 1023 << 8
%2:_(s64) = G_AND %0, %1
Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4214,8 +4214,9 @@
const Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
- TargetOpcode::G_UBFX, Ty, Ty))
+ TargetOpcode::G_UBFX, Ty, ExtractTy))
return false;
// Try to match shr (and x, c1), c2
@@ -4249,8 +4250,8 @@
return false;
MatchInfo = [=](MachineIRBuilder &B) {
- auto WidthCst = B.buildConstant(Ty, Width);
- auto PosCst = B.buildConstant(Ty, Pos);
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
};
return true;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D116803.398107.patch
Type: text/x-patch
Size: 3840 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220107/33d20b88/attachment.bin>
More information about the llvm-commits
mailing list