[llvm] bed8394 - [GISel]: Few InsertVecElt combines
Aditya Nandakumar via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 28 12:28:54 PDT 2020
Author: Aditya Nandakumar
Date: 2020-10-28T12:27:07-07:00
New Revision: bed83940478449b7ee08d43e5b74995912bf8206
URL: https://github.com/llvm/llvm-project/commit/bed83940478449b7ee08d43e5b74995912bf8206
DIFF: https://github.com/llvm/llvm-project/commit/bed83940478449b7ee08d43e5b74995912bf8206.diff
LOG: [GISel]: Few InsertVecElt combines
https://reviews.llvm.org/D88060
This adds the following combines
1) build_vector formation from insert_vec_elts
2) insert_vec_elts (build_vector) -> build_vector
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
llvm/test/CodeGen/AArch64/combine-loads.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 73585b51ead3..a0912dc7e9fa 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -431,6 +431,12 @@ class CombinerHelper {
bool matchPtrAddZero(MachineInstr &MI);
bool applyPtrAddZero(MachineInstr &MI);
+ bool matchCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
+
+ bool applyCombineInsertVecElts(MachineInstr &MI,
+ SmallVectorImpl<Register> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index c1c72d0ccbb9..e337a835b4fa 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -396,6 +396,35 @@ struct CheckType {
inline CheckType m_SpecificType(LLT Ty) { return Ty; }
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty, unsigned Opcode>
+struct TernaryOp_match {
+ Src0Ty Src0;
+ Src1Ty Src1;
+ Src2Ty Src2;
+
+ TernaryOp_match(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
+ : Src0(Src0), Src1(Src1), Src2(Src2) {}
+ template <typename OpTy>
+ bool match(const MachineRegisterInfo &MRI, OpTy &&Op) {
+ MachineInstr *TmpMI;
+ if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
+ if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 4) {
+ return (Src0.match(MRI, TmpMI->getOperand(1).getReg()) &&
+ Src1.match(MRI, TmpMI->getOperand(2).getReg()) &&
+ Src2.match(MRI, TmpMI->getOperand(3).getReg()));
+ }
+ }
+ return false;
+ }
+};
+template <typename Src0Ty, typename Src1Ty, typename Src2Ty>
+inline TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>
+m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
+ return TernaryOp_match<Src0Ty, Src1Ty, Src2Ty,
+ TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2);
+}
+
} // namespace GMIPatternMatch
} // namespace llvm
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a57db1bcfb7..ec50a1b21a57 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -505,6 +505,17 @@ def ptr_add_with_zero: GICombineRule<
[{ return Helper.matchPtrAddZero(*${root}); }]),
(apply [{ return Helper.applyPtrAddZero(*${root}); }])>;
+def regs_small_vec : GIDefMatchData<"SmallVector<Register, 4>">;
+def combine_insert_vec_elts_build_vector : GICombineRule<
+ (defs root:$root, regs_small_vec:$info),
+ (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
+ [{ return Helper.matchCombineInsertVecElts(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>;
+
+// Currently only the one combine above.
+def insert_vec_elt_combines : GICombineGroup<
+ [combine_insert_vec_elts_build_vector]>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -532,9 +543,9 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
-def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
- combines_for_extload, combine_indexed_load_store, undef_combines,
- identity_combines, simplify_add_to_sub,
+def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
+ ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store,
+ undef_combines, identity_combines, simplify_add_to_sub,
hoist_logic_op_with_same_opcode_hands,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index f2209bff2072..4de17bf4545b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2431,6 +2431,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
new file mode 100644
index 000000000000..c80445a52cbb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -0,0 +1,174 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner %s | FileCheck %s
+---
+name: test_combine_consecutive
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: test_combine_consecutive
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: $x0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
+ %8:_(s32) = G_CONSTANT i32 1
+ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32)
+ %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32)
+ $x0 = COPY %4
+...
+---
+name: test_combine_
diff _order
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: test_combine_
diff _order
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY]](s32)
+ ; CHECK: $x0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 1
+ %8:_(s32) = G_CONSTANT i32 0
+ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32)
+ %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32)
+ $x0 = COPY %4
+...
+---
+name: test_combine_insert_vec_build_vec_idx_1
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+
+ ; CHECK-LABEL: name: test_combine_insert_vec_build_vec_idx_1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %6:_(s32) = COPY $w2
+ %7:_(s32) = COPY $w3
+ %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7
+ %3:_(s32) = G_CONSTANT i32 1
+ %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ $q0 = COPY %4
+...
+---
+name: test_combine_insert_vec_build_vec_idx_oob
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+
+ ; CHECK-LABEL: name: test_combine_insert_vec_build_vec_idx_oob
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[COPY]](s32), [[C]](s32)
+ ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %6:_(s32) = COPY $w2
+ %7:_(s32) = COPY $w3
+ %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7
+ %3:_(s32) = G_CONSTANT i32 4
+ %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ $q0 = COPY %4
+...
+---
+name: test_combine_insert_vec_build_vec_variable
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+
+ ; CHECK-LABEL: name: test_combine_insert_vec_build_vec_variable
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[COPY]](s32), [[COPY]](s32)
+ ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %6:_(s32) = COPY $w2
+ %7:_(s32) = COPY $w3
+ %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7
+ %3:_(s32) = COPY %0
+ %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ $q0 = COPY %4
+...
+---
+name: test_combine_multiple_same_idx_1
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: test_combine_multiple_same_idx_1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: $x0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
+ %8:_(s32) = G_CONSTANT i32 1
+ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32)
+ %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32)
+ %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s32)
+ $x0 = COPY %5
+...
+---
+name: test_combine_multiple_same_idx_2
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: test_combine_multiple_same_idx_2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32)
+ ; CHECK: $x0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
+ %8:_(s32) = G_CONSTANT i32 1
+ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32)
+ %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %7(s32)
+ %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s32)
+ $x0 = COPY %5
+...
+---
+name: test_combine_missing_idx
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: test_combine_missing_idx
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32), [[COPY1]](s32), [[COPY]](s32)
+ ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(<4 x s32>) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
+ %8:_(s32) = G_CONSTANT i32 2
+ %9:_(s32) = G_CONSTANT i32 3
+ %10:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32)
+ %11:_(<4 x s32>) = G_INSERT_VECTOR_ELT %10, %1(s32), %8(s32)
+ %12:_(<4 x s32>) = G_INSERT_VECTOR_ELT %11, %0(s32), %9(s32)
+ $q0 = COPY %12
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
index f2b5b67222d4..a6f231f81853 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
@@ -119,22 +119,15 @@ name: concat_to_build_vector_negative_test
tracksRegLiveness: true
body: |
bb.1:
- liveins: $x0, $x1
+ liveins: $q0
; CHECK-LABEL: name: concat_to_build_vector_negative_test
- ; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32)
- ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[IVEC]](<2 x s64>), [[DEF1]](<2 x s64>)
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[DEF]](<2 x s64>)
; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>)
- %0:_(s64) = COPY $x0
- %1:_(s64) = COPY $x1
- %2:_(<2 x s64>) = G_IMPLICIT_DEF
- %3:_(s32) = G_CONSTANT i32 1
- %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
+ %4:_(<2 x s64>) = COPY $q0
%5:_(<2 x s64>) = G_IMPLICIT_DEF
%6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
RET_ReallyLR implicit %6
diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll
index 2e88c3c82424..c7275edc532f 100644
--- a/llvm/test/CodeGen/AArch64/combine-loads.ll
+++ b/llvm/test/CodeGen/AArch64/combine-loads.ll
@@ -4,10 +4,10 @@
define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
; CHECK-LABEL: z:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: ldr x8, [x0, #8]
-; CHECK-NEXT: mov v0.d[0], x9
+; CHECK-NEXT: // implicit-def: $q0
+; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
%b = load i64, i64* %p
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index cdb303f5ec3b..a51626c9c2ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -3385,34 +3385,34 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s6, 0xffffff
-; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: v_and_b32_e32 v0, s6, v0
-; CGP-NEXT: v_and_b32_e32 v2, s6, v2
-; CGP-NEXT: v_and_b32_e32 v3, s6, v4
-; CGP-NEXT: v_and_b32_e32 v4, s6, v6
+; CGP-NEXT: v_and_b32_e32 v1, s6, v2
+; CGP-NEXT: v_and_b32_e32 v2, s6, v4
+; CGP-NEXT: v_and_b32_e32 v3, s6, v6
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0
-; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v4, v4
+; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1
+; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3
+; CGP-NEXT: v_rcp_f32_e32 v4, v2
; CGP-NEXT: v_rcp_f32_e32 v5, v3
-; CGP-NEXT: v_rcp_f32_e32 v6, v4
-; CGP-NEXT: v_mul_f32_e32 v5, v0, v5
-; CGP-NEXT: v_mul_f32_e32 v6, v2, v6
+; CGP-NEXT: v_mul_f32_e32 v4, v0, v4
+; CGP-NEXT: v_mul_f32_e32 v5, v1, v5
+; CGP-NEXT: v_trunc_f32_e32 v4, v4
; CGP-NEXT: v_trunc_f32_e32 v5, v5
-; CGP-NEXT: v_trunc_f32_e32 v6, v6
-; CGP-NEXT: v_mad_f32 v0, -v5, v3, v0
+; CGP-NEXT: v_mad_f32 v0, -v4, v2, v0
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT: v_mad_f32 v1, -v5, v3, v1
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_mad_f32 v2, -v6, v4, v2
-; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3
+; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v4
-; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, v3
+; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
; CGP-NEXT: v_and_b32_e32 v0, s6, v0
-; CGP-NEXT: v_and_b32_e32 v2, s6, v2
-; CGP-NEXT: v_mov_b32_e32 v3, v1
+; CGP-NEXT: v_and_b32_e32 v2, s6, v1
+; CGP-NEXT: v_mov_b32_e32 v1, 0
+; CGP-NEXT: v_mov_b32_e32 v3, 0
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
%den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 11becb06a9d6..2e1292d9dc65 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -3336,38 +3336,38 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s6, 0xffffff
-; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: v_and_b32_e32 v0, s6, v0
-; CGP-NEXT: v_and_b32_e32 v2, s6, v2
-; CGP-NEXT: v_and_b32_e32 v3, s6, v4
-; CGP-NEXT: v_and_b32_e32 v4, s6, v6
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, v0
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT: v_cvt_f32_u32_e32 v7, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v8, v4
-; CGP-NEXT: v_rcp_f32_e32 v9, v6
-; CGP-NEXT: v_rcp_f32_e32 v10, v8
-; CGP-NEXT: v_mul_f32_e32 v9, v5, v9
-; CGP-NEXT: v_mul_f32_e32 v10, v7, v10
+; CGP-NEXT: v_and_b32_e32 v1, s6, v2
+; CGP-NEXT: v_and_b32_e32 v2, s6, v4
+; CGP-NEXT: v_and_b32_e32 v3, s6, v6
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0
+; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT: v_cvt_f32_u32_e32 v6, v1
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT: v_rcp_f32_e32 v8, v5
+; CGP-NEXT: v_rcp_f32_e32 v9, v7
+; CGP-NEXT: v_mul_f32_e32 v8, v4, v8
+; CGP-NEXT: v_mul_f32_e32 v9, v6, v9
+; CGP-NEXT: v_trunc_f32_e32 v8, v8
; CGP-NEXT: v_trunc_f32_e32 v9, v9
-; CGP-NEXT: v_trunc_f32_e32 v10, v10
-; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5
+; CGP-NEXT: v_mad_f32 v4, -v8, v5, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT: v_mad_f32 v6, -v9, v7, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
-; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10
-; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6
+; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, v5
+; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v6|, v7
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v7|, v8
-; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT: v_mul_lo_u32 v2, v4, v2
; CGP-NEXT: v_mul_lo_u32 v3, v5, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v6, v4
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; CGP-NEXT: v_and_b32_e32 v0, s6, v0
-; CGP-NEXT: v_and_b32_e32 v2, s6, v2
-; CGP-NEXT: v_mov_b32_e32 v3, v1
+; CGP-NEXT: v_and_b32_e32 v2, s6, v1
+; CGP-NEXT: v_mov_b32_e32 v1, 0
+; CGP-NEXT: v_mov_b32_e32 v3, 0
; CGP-NEXT: s_setpc_b64 s[30:31]
%num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
%den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index 9139cd029add..0ba151d33bff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -154,15 +154,15 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) {
; GCN-LABEL: scalar_xnor_i64_mul_use:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
-; GCN-NEXT: s_not_b64 s[0:1], s[2:3]
-; GCN-NEXT: s_add_u32 s2, s2, s4
-; GCN-NEXT: s_cselect_b32 s4, 1, 0
-; GCN-NEXT: s_and_b32 s4, s4, 1
-; GCN-NEXT: s_cmp_lg_u32 s4, 0
-; GCN-NEXT: s_addc_u32 s3, s3, s5
+; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
+; GCN-NEXT: s_not_b64 s[4:5], s[2:3]
+; GCN-NEXT: s_add_u32 s2, s2, s0
+; GCN-NEXT: s_cselect_b32 s0, 1, 0
+; GCN-NEXT: s_and_b32 s0, s0, 1
+; GCN-NEXT: s_cmp_lg_u32 s0, 0
+; GCN-NEXT: s_addc_u32 s3, s3, s1
+; GCN-NEXT: s_mov_b32 s0, s4
+; GCN-NEXT: s_mov_b32 s1, s5
; GCN-NEXT: ; return to shader part epilog
%xor = xor i64 %a, %b
%r0.val = xor i64 %xor, -1
More information about the llvm-commits
mailing list