[llvm] cc947e2 - [GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX
Dominik Montada via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 5 04:52:16 PDT 2021
Author: Dominik Montada
Date: 2021-08-05T13:52:10+02:00
New Revision: cc947e29ea0afa6efa506ffbf74115879be64c6d
URL: https://github.com/llvm/llvm-project/commit/cc947e29ea0afa6efa506ffbf74115879be64c6d
DIFF: https://github.com/llvm/llvm-project/commit/cc947e29ea0afa6efa506ffbf74115879be64c6d.diff
LOG: [GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D107330
Added:
llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/arm64-rev.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 56459b68dce08..555be8be8885b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -541,6 +541,10 @@ class CombinerHelper {
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
+ bool matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI,
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f35156d598492..13ebc43697c2f 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -652,8 +652,15 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
[{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def bitfield_extract_from_shr : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
- bitfield_extract_from_and]>;
+ bitfield_extract_from_and,
+ bitfield_extract_from_shr]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 715dce880d55d..c816f5b71b3d4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4128,6 +4128,55 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir
new file mode 100644
index 0000000000000..3ed50d3c67fd9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir
@@ -0,0 +1,178 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check that we can fold a G_ASHR/G_LSHR fed by a G_SHL into a G_SBFX/G_UBFX.
+
+---
+name: apply_ashr_shl_to_sbfx
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[SBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 5
+ %2:_(s32) = G_CONSTANT i32 16
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: apply_ashr_shl_to_sbfx_lower_bound
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_lower_bound
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[SBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 30
+ %2:_(s32) = G_CONSTANT i32 31
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: apply_ashr_shl_to_sbfx_upper_bound
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_upper_bound
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+ ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[SBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 1
+ %2:_(s32) = G_CONSTANT i32 31
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: apply_lshr_shl_to_ubfx
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[UBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 5
+ %2:_(s32) = G_CONSTANT i32 16
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_LSHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: apply_lshr_shl_to_ubfx_lower_bound
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_lower_bound
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[UBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 30
+ %2:_(s32) = G_CONSTANT i32 31
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_LSHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: apply_lshr_shl_to_ubfx_upper_bound
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_upper_bound
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+ ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
+ ; CHECK: $w0 = COPY [[UBFX]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 1
+ %2:_(s32) = G_CONSTANT i32 31
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_LSHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: dont_apply_pos_out_of_bounds
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: dont_apply_pos_out_of_bounds
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+ ; CHECK: $w0 = COPY [[ASHR]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 5
+ %2:_(s32) = G_CONSTANT i32 2
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: dont_apply_no_constant
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: dont_apply_no_constant
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: $w0 = COPY [[ASHR]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 2
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ $w0 = COPY %4(s32)
+...
+
+---
+name: dont_apply_more_than_one_use
+legalized: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: dont_apply_more_than_one_use
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+ ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SHL]], [[ASHR]]
+ ; CHECK: $w0 = COPY [[MUL]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 5
+ %2:_(s32) = G_CONSTANT i32 16
+ %3:_(s32) = G_SHL %0, %1
+ %4:_(s32) = G_ASHR %3, %2
+ %5:_(s32) = G_MUL %3, %4
+ $w0 = COPY %5(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 5393cdc8ab5da..cd9318882c532 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -156,8 +156,7 @@ define i64 @test_rev_x_srl32_shift(i64 %a) {
;
; GISEL-LABEL: test_rev_x_srl32_shift:
; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: lsl x8, x0, #33
-; GISEL-NEXT: lsr x8, x8, #35
+; GISEL-NEXT: ubfx x8, x0, #2, #29
; GISEL-NEXT: rev x8, x8
; GISEL-NEXT: lsr x0, x8, #32
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index 85146308beb8c..a87e8f8e3a27a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -399,8 +399,7 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 31
-; GFX6-NEXT: s_lshr_b32 s0, s0, 31
+; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -1059,8 +1058,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 9
-; GFX6-NEXT: s_lshr_b32 s0, s0, 11
+; GFX6-NEXT: s_bfe_u32 s0, s0, 0x150002
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
More information about the llvm-commits
mailing list