[llvm] b309bc0 - [GlobalISel] Combine out-of-range shifts to undef.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 17 15:05:06 PST 2023
Author: Amara Emerson
Date: 2023-02-17T15:05:00-08:00
New Revision: b309bc04eebc9c736b6c34d73d520a6ef7baf302
URL: https://github.com/llvm/llvm-project/commit/b309bc04eebc9c736b6c34d73d520a6ef7baf302
DIFF: https://github.com/llvm/llvm-project/commit/b309bc04eebc9c736b6c34d73d520a6ef7baf302.diff
LOG: [GlobalISel] Combine out-of-range shifts to undef.
Differential Revision: https://reviews.llvm.org/D144303
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9dc1abbcfa40e..afc29fcb72c76 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -789,6 +789,9 @@ class CombinerHelper {
/// (X ^ Y) != X -> Y != 0
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Match shifts greater or equal to the bitwidth of the operation.
+ bool matchShiftsTooBig(MachineInstr &MI);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7aed4982cda3e..e05de02f693c3 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -205,6 +205,12 @@ def ptr_add_immed_chain : GICombineRule<
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
+def shifts_too_big : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchShiftsTooBig(*${root}); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+
// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
def shift_immed_chain : GICombineRule<
@@ -1089,7 +1095,7 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
- simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+ simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 81863c1edd6b2..d83bf24e4db11 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6194,6 +6194,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir
new file mode 100644
index 0000000000000..d4dc24741527b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir
@@ -0,0 +1,132 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: shl_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 20
+ %3:_(s16) = G_SHL %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: lshr_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: lshr_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 16
+ %3:_(s16) = G_LSHR %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ashr_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: ashr_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 20
+ %3:_(s16) = G_ASHR %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: shl_by_ge_bw_vector
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw_vector
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %shl:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %0:_(s32) = G_CONSTANT i32 32
+ %bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
+ $q0 = COPY %shl(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: shl_by_ge_bw_vector_partial
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw_vector_partial
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; CHECK-NEXT: %small:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), %small(s32)
+ ; CHECK-NEXT: %shl:_(<4 x s32>) = G_SHL [[COPY]], %bv(<4 x s32>)
+ ; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %0:_(s32) = G_CONSTANT i32 32
+ %small:_(s32) = G_CONSTANT i32 4
+ %bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %small
+ %shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
+ $q0 = COPY %shl(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
index 985c23294221d..7893bfa1d38f0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
@@ -261,8 +261,9 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg
+ ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
%trunc:_(s16) = G_TRUNC %arg
@@ -283,8 +284,9 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg
+ ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
%trunc:_(s16) = G_TRUNC %arg
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir
index 030e30c90beb2..1eb0b7de0692e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir
@@ -121,10 +121,8 @@ body: |
; CHECK-LABEL: name: narrow_ashr_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_ASHR %0, %1
@@ -141,10 +139,8 @@ body: |
; CHECK-LABEL: name: narrow_ashr_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_ASHR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir
index 6f5a4b6a6bc4d..d649a8ad58b43 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir
@@ -119,10 +119,8 @@ body: |
; CHECK-LABEL: name: narrow_lshr_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_LSHR %0, %1
@@ -139,10 +137,8 @@ body: |
; CHECK-LABEL: name: narrow_lshr_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_LSHR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
index eba3fec13c736..f939742ecba61 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
@@ -120,10 +120,8 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_SHL %0, %1
@@ -140,10 +138,8 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_SHL %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
index 104be036426fe..28eaecee81d71 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
@@ -889,7 +889,7 @@ define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) {
;
; GFX8-LABEL: s_sext_inreg_v4i16_14:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s0, 0, 0x100000
+; GFX8-NEXT: s_bfe_u32 s0, -1, 0x100000
; GFX8-NEXT: s_mov_b32 s1, s0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -978,7 +978,7 @@ define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) {
; GFX8-LABEL: v_sext_inreg_v8i16_11:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_bfe_u32 s4, 0, 0x100000
+; GFX8-NEXT: s_bfe_u32 s4, -1, 0x100000
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, s4
More information about the llvm-commits
mailing list