[llvm] 653beae - [AMDGPU][GISel] Add Identity BUILD_VECTOR Combines
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 30 07:07:21 PDT 2022
Author: Pierre van Houtryve
Date: 2022-09-30T14:07:13Z
New Revision: 653beae5a16816a6ef4b699803fd0fc54b8bbbb8
URL: https://github.com/llvm/llvm-project/commit/653beae5a16816a6ef4b699803fd0fc54b8bbbb8
DIFF: https://github.com/llvm/llvm-project/commit/653beae5a16816a6ef4b699803fd0fc54b8bbbb8.diff
LOG: [AMDGPU][GISel] Add Identity BUILD_VECTOR Combines
Folds-away BUILD_VECTOR-related noops in the post-legalizer combiner.
Depends on D134433
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D134953
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index fe1585e4600d6..ad704cb51295b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -751,6 +751,9 @@ class CombinerHelper {
/// Transform G_ADD(G_SUB(y, x), x) to y.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src);
+ bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo);
+ bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo);
+
/// \returns true if it is possible to simplify a select instruction \p MI
/// to a min/max instruction of some sort.
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 1cacf96620f02..7eac0c1f17248 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -354,6 +354,17 @@ inline bind_ty<LLT> m_Type(LLT Ty) { return Ty; }
inline bind_ty<CmpInst::Predicate> m_Pred(CmpInst::Predicate &P) { return P; }
inline operand_type_match m_Pred() { return operand_type_match(); }
+struct ImplicitDefMatch {
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *TmpMI;
+ if (mi_match(Reg, MRI, m_MInstr(TmpMI)))
+ return TmpMI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+ return false;
+ }
+};
+
+inline ImplicitDefMatch m_GImplicitDef() { return ImplicitDefMatch(); }
+
// Helper for matching G_FCONSTANT
inline bind_ty<const ConstantFP *> m_GFCst(const ConstantFP *&C) { return C; }
@@ -423,6 +434,19 @@ m_GAdd(const LHS &L, const RHS &R) {
return BinaryOp_match<LHS, RHS, TargetOpcode::G_ADD, true>(L, R);
}
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false>
+m_GBuildVector(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false>
+m_GBuildVectorTrunc(const LHS &L, const RHS &R) {
+ return BinaryOp_match<LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false>(L,
+ R);
+}
+
template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, false>
m_GPtrAdd(const LHS &L, const RHS &R) {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 26726787e659f..b5057646c85e4 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -932,6 +932,18 @@ def add_sub_reg: GICombineRule <
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root},
${matchinfo}); }])>;
+def buildvector_identity_fold : GICombineRule<
+ (defs root:$build_vector, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_BUILD_VECTOR_TRUNC, G_BUILD_VECTOR):$build_vector,
+ [{ return Helper.matchBuildVectorIdentityFold(*${build_vector}, ${matchinfo}); }]),
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${build_vector}, ${matchinfo}); }])>;
+
+def trunc_buildvector_fold : GICombineRule<
+ (defs root:$op, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_TRUNC):$op,
+ [{ return Helper.matchTruncBuildVectorFold(*${op}, ${matchinfo}); }]),
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${matchinfo}); }])>;
+
def select_to_minmax: GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_SELECT):$root,
@@ -955,7 +967,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_right_to_zero, p2i_to_i2p,
i2p_to_p2i, anyext_trunc_fold,
fneg_fneg_fold, right_identity_one,
- add_sub_reg]>;
+ add_sub_reg, buildvector_identity_fold,
+ trunc_buildvector_fold]>;
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index ac11bdc55e005..8bbecd7b3679e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5800,6 +5800,63 @@ bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // This combine folds the following patterns:
+ //
+ // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
+ // into
+ // x
+ // if
+ // k == sizeof(VecEltTy)/2
+ // type(x) == type(dst)
+ //
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
+ // into
+ // x
+ // if
+ // type(x) == type(dst)
+
+ LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT DstEltTy = DstVecTy.getElementType();
+
+ Register Lo, Hi;
+
+ if (mi_match(
+ MI, MRI,
+ m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo))), m_GImplicitDef()))) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+
+ Optional<ValueAndVReg> ShiftAmount;
+ const auto LoPattern = m_GBitcast(m_Reg(Lo));
+ const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
+ if (mi_match(
+ MI, MRI,
+ m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
+ m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
+ if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
+ // if type(x) == type(G_TRUNC)
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
+ return false;
+
+ return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
+}
+
unsigned CombinerHelper::getFPMinMaxOpcForSelect(
CmpInst::Predicate Pred, LLT DstTy,
SelectPatternNaNBehaviour VsNaNRetVal) const {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
index 537de76fd6c8a..60e471d30413a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll
@@ -50,10 +50,6 @@ define amdgpu_vs float @test_f16_f32_add_ext_mul_rhs(half inreg %x, half inreg %
define amdgpu_vs <5 x float> @test_5xf16_5xf32_add_ext_mul(<5 x half> inreg %x, <5 x half> inreg %y, <5 x float> inreg %z) {
; GFX9-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul:
; GFX9-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX9-FAST-DENORM-NEXT: s_pack_lh_b32_b16 s3, s3, s3
-; GFX9-FAST-DENORM-NEXT: s_pack_lh_b32_b16 s4, s4, s4
-; GFX9-FAST-DENORM-NEXT: s_pack_lh_b32_b16 s0, s0, s0
-; GFX9-FAST-DENORM-NEXT: s_pack_lh_b32_b16 s1, s1, s1
; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s3
; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s4
; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
index 8a29a96cb6b30..e39c3e9d3339a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
@@ -786,177 +786,69 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-LABEL: test_3xhalf_add_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX9-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX9-NEXT: v_lshl_or_b32 v2, v7, 16, v2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v3, v8, 16, v3
-; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX9-NEXT: v_pk_add_f16 v0, v3, v0
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_pk_add_f16 v0, v4, v0
; GFX9-NEXT: v_pk_add_f16 v1, v5, v1
-; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
; GFX9-CONTRACT: ; %bb.0: ; %.entry
; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX9-CONTRACT-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-CONTRACT-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX9-CONTRACT-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX9-CONTRACT-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX9-CONTRACT-NEXT: v_lshl_or_b32 v2, v7, 16, v2
-; GFX9-CONTRACT-NEXT: v_lshl_or_b32 v4, v8, 16, v4
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-CONTRACT-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX9-CONTRACT-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX9-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-DENORM-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX9-DENORM-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX9-DENORM-NEXT: v_lshl_or_b32 v2, v7, 16, v2
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-DENORM-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX9-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-DENORM-NEXT: v_lshl_or_b32 v3, v8, 16, v3
-; GFX9-DENORM-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
-; GFX9-DENORM-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
; GFX9-UNSAFE: ; %bb.0: ; %.entry
; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX9-UNSAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-UNSAFE-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX9-UNSAFE-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX9-UNSAFE-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX9-UNSAFE-NEXT: v_lshl_or_b32 v2, v7, 16, v2
-; GFX9-UNSAFE-NEXT: v_lshl_or_b32 v4, v8, 16, v4
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-UNSAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX9-UNSAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_3xhalf_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-NEXT: v_lshl_or_b32 v2, v7, 16, v2
+; GFX10-NEXT: v_pk_add_f16 v0, v4, v0
; GFX10-NEXT: v_pk_add_f16 v1, v5, v1
-; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4
-; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v4
-; GFX10-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-NEXT: v_pk_add_f16 v0, v2, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX10-CONTRACT-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-CONTRACT-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX10-CONTRACT-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX10-CONTRACT-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-CONTRACT-NEXT: v_lshl_or_b32 v2, v7, 16, v2
-; GFX10-CONTRACT-NEXT: v_lshl_or_b32 v4, v8, 16, v4
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-CONTRACT-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-CONTRACT-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX10-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-DENORM-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-DENORM-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-DENORM-NEXT: v_lshl_or_b32 v2, v7, 16, v2
+; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
-; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
-; GFX10-DENORM-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-DENORM-NEXT: v_lshl_or_b32 v2, v2, 16, v4
-; GFX10-DENORM-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-DENORM-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-DENORM-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX10-UNSAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-UNSAFE-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX10-UNSAFE-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX10-UNSAFE-NEXT: v_lshl_or_b32 v0, v6, 16, v0
-; GFX10-UNSAFE-NEXT: v_lshl_or_b32 v2, v7, 16, v2
-; GFX10-UNSAFE-NEXT: v_lshl_or_b32 v4, v8, 16, v4
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-UNSAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-UNSAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
%a = fmul <3 x half> %x, %y
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
index 8e19cfbd5c5e7..ff508e91a075c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
@@ -2,8 +2,8 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
; GFX8-UNPACKED-LABEL: load_1d_f16_x:
@@ -546,45 +546,21 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: ; return to shader part epilog
;
-; GFX10-LABEL: load_1d_v3f16_xyz:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_mov_b32 s0, s2
-; GFX10-NEXT: s_mov_b32 s1, s3
-; GFX10-NEXT: s_mov_b32 s2, s4
-; GFX10-NEXT: s_mov_b32 s3, s5
-; GFX10-NEXT: s_mov_b32 s4, s6
-; GFX10-NEXT: s_mov_b32 s5, s7
-; GFX10-NEXT: s_mov_b32 s6, s8
-; GFX10-NEXT: s_mov_b32 s7, s9
-; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX10-NEXT: ; return to shader part epilog
-;
-; GFX11-LABEL: load_1d_v3f16_xyz:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_mov_b32 s0, s2
-; GFX11-NEXT: s_mov_b32 s1, s3
-; GFX11-NEXT: s_mov_b32 s2, s4
-; GFX11-NEXT: s_mov_b32 s3, s5
-; GFX11-NEXT: s_mov_b32 s4, s6
-; GFX11-NEXT: s_mov_b32 s5, s7
-; GFX11-NEXT: s_mov_b32 s6, s8
-; GFX11-NEXT: s_mov_b32 s7, s9
-; GFX11-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX10PLUS-LABEL: load_1d_v3f16_xyz:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: s_mov_b32 s0, s2
+; GFX10PLUS-NEXT: s_mov_b32 s1, s3
+; GFX10PLUS-NEXT: s_mov_b32 s2, s4
+; GFX10PLUS-NEXT: s_mov_b32 s3, s5
+; GFX10PLUS-NEXT: s_mov_b32 s4, s6
+; GFX10PLUS-NEXT: s_mov_b32 s5, s7
+; GFX10PLUS-NEXT: s_mov_b32 s6, s8
+; GFX10PLUS-NEXT: s_mov_b32 s7, s9
+; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
+; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX10PLUS-NEXT: ; return to shader part epilog
%v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %v
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir
new file mode 100644
index 0000000000000..33e484682eb31
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-buildvector-identities.mir
@@ -0,0 +1,157 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: v2s16_trunc_same_bitcast_lshr16
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_trunc_same_bitcast_lshr16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast:_(s32) = G_BITCAST %src
+ %lshr_amount:_(s32) = G_CONSTANT i32 16
+ %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount
+ %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast, %lshr
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s16_trunc_
diff erent_bitcast_lshr16
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_trunc_
diff erent_bitcast_lshr16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast0:_(s32) = G_BITCAST %src
+ %bitcast1:_(s32) = G_BITCAST %src
+ %lshr_amount:_(s32) = G_CONSTANT i32 16
+ %lshr:_(s32) = G_LSHR %bitcast1, %lshr_amount
+ %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast0, %lshr
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s16_trunc_same_bitcast_lshr8_nocombine
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_trunc_same_bitcast_lshr8_nocombine
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: %bitcast:_(s32) = G_BITCAST %src(<2 x s16>)
+ ; CHECK-NEXT: %lshr_amount:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount(s32)
+ ; CHECK-NEXT: %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast(s32), %lshr(s32)
+ ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast:_(s32) = G_BITCAST %src
+ %lshr_amount:_(s32) = G_CONSTANT i32 8
+ %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount
+ %root:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %bitcast, %lshr
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s16_same_bitcast_lshr16
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_same_bitcast_lshr16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast:_(s32) = G_BITCAST %src
+ %lshr_amount:_(s32) = G_CONSTANT i32 16
+ %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount
+ %trunclo:_(s16) = G_TRUNC %bitcast
+ %trunchi:_(s16) = G_TRUNC %lshr
+ %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo, %trunchi
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s16_same_bitcast_lshr8_nocombine
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_same_bitcast_lshr8_nocombine
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: %bitcast:_(s32) = G_BITCAST %src(<2 x s16>)
+ ; CHECK-NEXT: %lshr_amount:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount(s32)
+ ; CHECK-NEXT: %trunclo:_(s16) = G_TRUNC %bitcast(s32)
+ ; CHECK-NEXT: %trunchi:_(s16) = G_TRUNC %lshr(s32)
+ ; CHECK-NEXT: %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo(s16), %trunchi(s16)
+ ; CHECK-NEXT: $vgpr0 = COPY %root(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast:_(s32) = G_BITCAST %src
+ %lshr_amount:_(s32) = G_CONSTANT i32 8
+ %lshr:_(s32) = G_LSHR %bitcast, %lshr_amount
+ %trunclo:_(s16) = G_TRUNC %bitcast
+ %trunchi:_(s16) = G_TRUNC %lshr
+ %root:_(<2 x s16>) = G_BUILD_VECTOR %trunclo, %trunchi
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s16_undefhi
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: v2s16_undefhi
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>)
+ %src:_(<2 x s16>) = COPY $vgpr0
+ %bitcast:_(s32) = G_BITCAST %src
+ %trunc:_(s16) = G_TRUNC %bitcast
+ %undef:_(s16) = G_IMPLICIT_DEF
+ %root:_(<2 x s16>) = G_BUILD_VECTOR %trunc, %undef
+ $vgpr0 = COPY %root
+...
+
+---
+name: v2s32_undefhi
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: v2s32_undefhi
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x s32>)
+ %src:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %bitcast:_(s64) = G_BITCAST %src
+ %trunc:_(s32) = G_TRUNC %bitcast
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %root:_(<2 x s32>) = G_BUILD_VECTOR %trunc, %undef
+ $vgpr0_vgpr1 = COPY %root
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir
new file mode 100644
index 0000000000000..c42c05453c943
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir
@@ -0,0 +1,106 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: s16_trunc_v2s16_buildvector
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]]
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
+ %5:_(s32) = G_BITCAST %4
+ %6:_(s16) = G_TRUNC %5
+ %7:_(s16) = G_CONSTANT i16 42
+ %8:_(s16) = G_OR %7, %6
+ %9:_(s32) = G_ZEXT %8
+ $vgpr0 = COPY %9
+...
+
+---
+name: s16_trunc_v2s32_buildvector_nofold
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: s16_trunc_v2s32_buildvector_nofold
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]]
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
+ %3:_(s64) = G_BITCAST %2
+ %4:_(s16) = G_TRUNC %3
+ %5:_(s16) = G_CONSTANT i16 42
+ %6:_(s16) = G_OR %5, %4
+ %7:_(s32) = G_ZEXT %6
+ $vgpr0 = COPY %7
+...
+
+---
+name: s32_trunc_v2s32_buildvector
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
+ %3:_(s64) = G_BITCAST %2
+ %4:_(s32) = G_TRUNC %3
+ $vgpr0 = COPY %4
+...
+
+---
+name: s32_trunc_v2s32_buildvector_multiple_users
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector_multiple_users
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[EVEC]](s32)
+ ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BITCAST]](s64)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
+ %3:_(s64) = G_BITCAST %2
+ %4:_(s32) = G_TRUNC %3
+ %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1
+ $vgpr0 = COPY %4
+ $vgpr1 = COPY %5
+ $vgpr2_vgpr3 = COPY %3
+...
More information about the llvm-commits
mailing list