[llvm] 40e269e - [GlobalISel] Add a combine for ashr(shl x, c), c --> sext_inreg x, c'
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 18 10:42:26 PDT 2020
Author: Amara Emerson
Date: 2020-08-18T10:42:15-07:00
New Revision: 40e269ea6db9c755c27e2ee1e201a640ac085afd
URL: https://github.com/llvm/llvm-project/commit/40e269ea6db9c755c27e2ee1e201a640ac085afd
DIFF: https://github.com/llvm/llvm-project/commit/40e269ea6db9c755c27e2ee1e201a640ac085afd.diff
LOG: [GlobalISel] Add a combine for ashr(shl x, c), c --> sext_inreg x, c'
By detecting this sign extend pattern early, we can uncover opportunities for
more optimizations.
Differential Revision: https://reviews.llvm.org/D85965
Added:
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index e632f5fd05ec..e5f2700f6de9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -294,6 +294,12 @@ class CombinerHelper {
bool applyBuildInstructionSteps(MachineInstr &MI,
InstructionStepsMatchInfo &MatchInfo);
+ /// Match ashr (shl x, C), C -> sext_inreg (C)
+ bool matchAshrShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+ bool applyAshShlToSextInreg(MachineInstr &MI,
+ std::tuple<Register, int64_t> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 043be086ff41..4e216a284088 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -251,6 +251,12 @@ m_GLShr(const LHS &L, const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_LSHR, false>(L, R);
}
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>
+m_GAShr(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>(L, R);
+}
+
// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc
template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
SrcTy L;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9cb45e2bfc11..4647afad4185 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -284,6 +284,15 @@ def hoist_logic_op_with_same_opcode_hands: GICombineRule <
(apply [{ return Helper.applyBuildInstructionSteps(*${root}, ${info});}])
>;
+// Fold ashr (shl x, C), C -> sext_inreg (C)
+def shl_ashr_to_sext_inreg_matchinfo : GIDefMatchData<"std::tuple<Register, int64_t>">;
+def shl_ashr_to_sext_inreg : GICombineRule<
+ (defs root:$root, shl_ashr_to_sext_inreg_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR): $root,
+ [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
+>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -301,4 +310,5 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>;
def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
combines_for_extload, combine_indexed_load_store, undef_combines,
identity_combines, simplify_add_to_sub,
- hoist_logic_op_with_same_opcode_hands]>;
+ hoist_logic_op_with_same_opcode_hands,
+ shl_ashr_to_sext_inreg]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b922f6988a2c..48294a07597f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1887,6 +1887,36 @@ bool CombinerHelper::applyBuildInstructionSteps(
return true;
}
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ // FIXME: detect splat constant vectors.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = {Src, ShlCst};
+ return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir
new file mode 100644
index 000000000000..14bda863d2c2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir
@@ -0,0 +1,90 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: ashr_shl_to_sext_inreg
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: ashr_shl_to_sext_inreg
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[TRUNC]], 8
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 8
+ %3:_(s16) = G_SHL %0, %2(s16)
+ %4:_(s16) = exact G_ASHR %3, %2(s16)
+ %5:_(s32) = G_ANYEXT %4(s16)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name:
diff erent_shift_amts
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name:
diff erent_shift_amts
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 12
+ ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = exact G_ASHR [[SHL]], [[C1]](s16)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 12
+ %4:_(s16) = G_CONSTANT i16 8
+ %3:_(s16) = G_SHL %0, %2(s16)
+ %5:_(s16) = exact G_ASHR %3, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ashr_shl_to_sext_inreg_vector
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1:
+ liveins: $d0
+ ; Currently don't support this for vectors just yet, this will need updating
+ ; when we do.
+ ; CHECK-LABEL: name: ashr_shl_to_sext_inreg_vector
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+ ; CHECK: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(<4 x s16>) = exact G_ASHR [[SHL]], [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK: $d0 = COPY [[ASHR]](<4 x s16>)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<4 x s16>) = COPY $d0
+ %2:_(s16) = G_CONSTANT i16 8
+ %1:_(<4 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16)
+ %3:_(<4 x s16>) = G_SHL %0, %1(<4 x s16>)
+ %4:_(<4 x s16>) = exact G_ASHR %3, %1(<4 x s16>)
+ $d0 = COPY %4(<4 x s16>)
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index e5d26476e942..f3a53fb7d22d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -674,8 +674,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000
-; GFX6-NEXT: s_lshl_b32 s0, s0, 8
-; GFX6-NEXT: s_ashr_i32 s0, s0, 8
+; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
@@ -830,8 +829,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s2, s2, s0
; GFX6-NEXT: s_bfe_i32 s0, s2, 0x80000
-; GFX6-NEXT: s_lshl_b32 s0, s0, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
+; GFX6-NEXT: s_sext_i32_i8 s0, s0
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
@@ -854,8 +852,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s2, s2, s0
; GFX6-NEXT: s_bfe_i32 s0, s2, 8
-; GFX6-NEXT: s_lshl_b32 s0, s0, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
+; GFX6-NEXT: s_sext_i32_i8 s0, s0
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
@@ -879,8 +876,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add
; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0
+; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
%load = load i8, i8 addrspace(1)* %ptr, align 1
@@ -904,8 +900,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a
; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0
+; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
%load = load i8, i8 addrspace(1)* %ptr, align 1
@@ -927,8 +922,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 31
-; GFX6-NEXT: s_ashr_i32 s0, s0, 31
+; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -951,8 +945,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 30
-; GFX6-NEXT: s_ashr_i32 s0, s0, 30
+; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10001
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -975,8 +968,7 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 30
-; GFX6-NEXT: s_ashr_i32 s0, s0, 30
+; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20001
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index ab3fbc03e81d..a8098b7dd9d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -423,8 +423,7 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s0, s0, 31
-; GFX6-NEXT: s_ashr_i32 s0, s0, 31
+; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -950,22 +949,22 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out)
define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb
-; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
-; GFX6-NEXT: s_mov_b32 s6, -1
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
-; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_and_b32 s0, s0, 63
-; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002
-; GFX6-NEXT: v_mov_b32_e32 v1, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0
-; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0
-; GFX6-NEXT: s_endpgm
+; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb
+; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX6-NEXT: s_mov_b32 s6, -1
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_and_b32 s0, s0, 63
+; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002
+; GFX6-NEXT: v_mov_b32_e32 v1, s1
+; GFX6-NEXT: v_mov_b32_e32 v0, s0
+; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0
+; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GFX6-NEXT: s_endpgm
i32 addrspace(1)* %out1,
i32 addrspace(1)* %in) #0 {
%src = load i32, i32 addrspace(1)* %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index f6565fe1b6e2..db9e75dd582c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -3415,8 +3415,7 @@ define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) {
; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2|
; CGP-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
-; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and i64 %num, 16777215
@@ -3736,10 +3735,8 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v3|
; CGP-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2
-; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
-; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2
-; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0
-; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2
+; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25
+; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; CGP-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 06d46321a59b..7f55c7358597 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -3363,8 +3363,7 @@ define i64 @v_srem_i64_24bit(i64 %num, i64 %den) {
; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; CGP-NEXT: v_mul_lo_u32 v1, v2, v1
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
-; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and i64 %num, 16777215
@@ -3677,20 +3676,18 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_rcp_f32_e32 v5, v4
; CGP-NEXT: v_ashrrev_i32_e32 v6, 30, v6
; CGP-NEXT: v_or_b32_e32 v6, 1, v6
-; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
+; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25
; CGP-NEXT: v_mul_f32_e32 v5, v1, v5
; CGP-NEXT: v_trunc_f32_e32 v5, v5
; CGP-NEXT: v_mad_f32 v1, -v5, v4, v1
; CGP-NEXT: v_cvt_i32_f32_e32 v5, v5
; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v4|
; CGP-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
-; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0
; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
; CGP-NEXT: v_mul_lo_u32 v3, v1, v3
; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2
-; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2
+; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25
; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
More information about the llvm-commits
mailing list