[llvm] cc947e2 - [GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX

Dominik Montada via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 5 04:52:16 PDT 2021


Author: Dominik Montada
Date: 2021-08-05T13:52:10+02:00
New Revision: cc947e29ea0afa6efa506ffbf74115879be64c6d

URL: https://github.com/llvm/llvm-project/commit/cc947e29ea0afa6efa506ffbf74115879be64c6d
DIFF: https://github.com/llvm/llvm-project/commit/cc947e29ea0afa6efa506ffbf74115879be64c6d.diff

LOG: [GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D107330

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AArch64/arm64-rev.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 56459b68dce08..555be8be8885b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -541,6 +541,10 @@ class CombinerHelper {
   bool matchBitfieldExtractFromAnd(
       MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
 
+  /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
+  bool matchBitfieldExtractFromShr(
+      MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   /// Reassociate pointer calculations with G_ADD involved, to allow better
   /// addressing mode usage.
   bool matchReassocPtrAdd(MachineInstr &MI,

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f35156d598492..13ebc43697c2f 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -652,8 +652,15 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
     [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def bitfield_extract_from_shr : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+    [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
 def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
-                                            bitfield_extract_from_and]>;
+                                            bitfield_extract_from_and,
+                                            bitfield_extract_from_shr]>;
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_PTR_ADD):$root,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 715dce880d55d..c816f5b71b3d4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4128,6 +4128,55 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
   return true;
 }
 
+bool CombinerHelper::matchBitfieldExtractFromShr(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  const unsigned Opcode = MI.getOpcode();
+  assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+  const Register Dst = MI.getOperand(0).getReg();
+
+  const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+                                  ? TargetOpcode::G_SBFX
+                                  : TargetOpcode::G_UBFX;
+
+  // Check if the type we would use for the extract is legal
+  LLT Ty = MRI.getType(Dst);
+  LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+  if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+    return false;
+
+  Register ShlSrc;
+  int64_t ShrAmt;
+  int64_t ShlAmt;
+  const unsigned Size = Ty.getScalarSizeInBits();
+
+  // Try to match shr (shl x, c1), c2
+  if (!mi_match(Dst, MRI,
+                m_BinOp(Opcode,
+                        m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+                        m_ICst(ShrAmt))))
+    return false;
+
+  // Make sure that the shift sizes can fit a bitfield extract
+  if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+    return false;
+
+  // Skip this combine if the G_SEXT_INREG combine could handle it
+  if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+    return false;
+
+  // Calculate start position and width of the extract
+  const int64_t Pos = ShrAmt - ShlAmt;
+  const int64_t Width = Size - ShrAmt;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    auto WidthCst = B.buildConstant(ExtractTy, Width);
+    auto PosCst = B.buildConstant(ExtractTy, Pos);
+    B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+  };
+  return true;
+}
+
 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
     MachineInstr &PtrAdd) {
   assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir
new file mode 100644
index 0000000000000..3ed50d3c67fd9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir
@@ -0,0 +1,178 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check that we can fold a G_ASHR/G_LSHR fed by a G_SHL into a G_SBFX/G_UBFX.
+
+---
+name:            apply_ashr_shl_to_sbfx
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[SBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 16
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            apply_ashr_shl_to_sbfx_lower_bound
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_lower_bound
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[SBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 30
+    %2:_(s32) = G_CONSTANT i32 31
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            apply_ashr_shl_to_sbfx_upper_bound
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_upper_bound
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[SBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 31
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            apply_lshr_shl_to_ubfx
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[UBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 16
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_LSHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            apply_lshr_shl_to_ubfx_lower_bound
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_lower_bound
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[UBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 30
+    %2:_(s32) = G_CONSTANT i32 31
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_LSHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            apply_lshr_shl_to_ubfx_upper_bound
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_upper_bound
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
+    ; CHECK: $w0 = COPY [[UBFX]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 31
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_LSHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            dont_apply_pos_out_of_bounds
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: dont_apply_pos_out_of_bounds
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: $w0 = COPY [[ASHR]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 2
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            dont_apply_no_constant
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: dont_apply_no_constant
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: $w0 = COPY [[ASHR]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = G_CONSTANT i32 2
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    $w0 = COPY %4(s32)
+...
+
+---
+name:            dont_apply_more_than_one_use
+legalized: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: dont_apply_more_than_one_use
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SHL]], [[ASHR]]
+    ; CHECK: $w0 = COPY [[MUL]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 16
+    %3:_(s32) = G_SHL %0, %1
+    %4:_(s32) = G_ASHR %3, %2
+    %5:_(s32) = G_MUL %3, %4
+    $w0 = COPY %5(s32)
+...

diff  --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 5393cdc8ab5da..cd9318882c532 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -156,8 +156,7 @@ define i64 @test_rev_x_srl32_shift(i64 %a) {
 ;
 ; GISEL-LABEL: test_rev_x_srl32_shift:
 ; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    lsl x8, x0, #33
-; GISEL-NEXT:    lsr x8, x8, #35
+; GISEL-NEXT:    ubfx x8, x0, #2, #29
 ; GISEL-NEXT:    rev x8, x8
 ; GISEL-NEXT:    lsr x0, x8, #32
 ; GISEL-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index 85146308beb8c..a87e8f8e3a27a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -399,8 +399,7 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
+; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
 ; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
@@ -1059,8 +1058,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 9
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 11
+; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x150002
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm


        


More information about the llvm-commits mailing list