[llvm] [AArch64][GlobalISel] TableGen Patterns for Lane 0 Vector Insert (PR #105689)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 09:22:33 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: None (chuongg3)
<details>
<summary>Changes</summary>
---
Patch is 291.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105689.diff
59 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp (+29-3)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+9-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+65-35)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+15)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp (+39)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll (+7-5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir (+14-7)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir (+15-1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir (+126-22)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+7-9)
- (modified) llvm/test/CodeGen/AArch64/abs.ll (+2-5)
- (modified) llvm/test/CodeGen/AArch64/arm64-dup.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll (+21-24)
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+50-99)
- (modified) llvm/test/CodeGen/AArch64/arm64-ld1.ll (+38-92)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-copy.ll (+106-123)
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/arm64-tbl.ll (+75-84)
- (modified) llvm/test/CodeGen/AArch64/bitcast.ll (+30-32)
- (modified) llvm/test/CodeGen/AArch64/bswap.ll (+2-3)
- (modified) llvm/test/CodeGen/AArch64/concat-vector.ll (+66-87)
- (modified) llvm/test/CodeGen/AArch64/fabs.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/faddsub.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+107-125)
- (modified) llvm/test/CodeGen/AArch64/fcopysign.ll (+14-16)
- (modified) llvm/test/CodeGen/AArch64/fcvt.ll (+91-133)
- (modified) llvm/test/CodeGen/AArch64/fdiv.ll (+16-24)
- (modified) llvm/test/CodeGen/AArch64/fexplog.ll (+160-155)
- (modified) llvm/test/CodeGen/AArch64/fminimummaximum.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fminmax.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fmla.ll (+69-99)
- (modified) llvm/test/CodeGen/AArch64/fmul.ll (+16-24)
- (modified) llvm/test/CodeGen/AArch64/fneg.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/fpow.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fpowi.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+24-30)
- (modified) llvm/test/CodeGen/AArch64/fptrunc.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/frem.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fsincos.ll (+64-62)
- (modified) llvm/test/CodeGen/AArch64/fsqrt.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/insertextract.ll (+17-40)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+64-80)
- (modified) llvm/test/CodeGen/AArch64/llvm.exp10.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/load.ll (+25-23)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+14-20)
- (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+22-22)
- (modified) llvm/test/CodeGen/AArch64/neon-perm.ll (+7-6)
- (modified) llvm/test/CodeGen/AArch64/ptradd.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/shift.ll (+90-84)
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+43-27)
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/usub_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+6-8)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+6-8)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711a..c44fe3bcd9cf22 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -61,15 +61,41 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
MachineInstr &IntoMI) const {
+ auto IntoMIIter = IntoMI.getIterator();
+
// Immediate neighbours are already folded.
if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
+ std::next(MI.getIterator()) == IntoMIIter)
return true;
// Convergent instructions cannot be moved in the CFG.
if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
return false;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+ if (MI.isLoadFoldBarrier())
+ return false;
+
+ // If the load is simple, check instructions between MI and IntoMI
+ if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
+ if (MI.memoperands_empty())
+ return false;
+ auto &MMO = **(MI.memoperands_begin());
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Ensure instructions between MI and IntoMI are not affected when combined
+ unsigned Iter = 0;
+ const unsigned MaxIter = 20;
+ for (auto CurrMI = MI.getIterator(); CurrMI != IntoMIIter; ++CurrMI) {
+ if (CurrMI->isLoadFoldBarrier())
+ return false;
+
+ if (Iter++ == MaxIter)
+ return false;
+ }
+
+ return true;
+ }
+
+ return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 3f717c8a60050f..ef00e962f3870f 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -290,6 +290,13 @@ def combine_mul_cmlt : GICombineRule<
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
>;
+def lower_build_insert_vec_elt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return matchLowerBuildToInsertVecElt(*${root}, MRI); }]),
+ (apply [{ applyLowerBuildToInsertVecElt(*${root}, MRI, B); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -300,7 +307,8 @@ def AArch64PostLegalizerLowering
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mull,
- vector_unmerge_lowering, insertelt_nonconst]> {
+ vector_unmerge_lowering, insertelt_nonconst,
+ lower_build_insert_vec_elt]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1053ba9242768a..3ca92c5ffa6bac 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3302,6 +3302,10 @@ defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
// Pre-fetch.
defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
+def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
+ [(vector_insert undef, node:$src, (i64 0)),
+ (scalar_to_vector node:$src)]>;
+
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
@@ -3310,13 +3314,13 @@ multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
ValueType ScalTy, ValueType VecTy,
Instruction LOADW, Instruction LOADX,
SubRegIndex sub> {
- def : Pat<(VecTy (scalar_to_vector (ScalTy
+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
sub)>;
- def : Pat<(VecTy (scalar_to_vector (ScalTy
+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
@@ -3344,12 +3348,12 @@ defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
@@ -3482,38 +3486,65 @@ def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32
+def : Pat <(v8i8 (vec_ins_or_scal_vec (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32
+def : Pat <(v16i8 (vec_ins_or_scal_vec (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32
+def : Pat <(v4i16 (vec_ins_or_scal_vec (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32
+def : Pat <(v8i16 (vec_ins_or_scal_vec (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32
+def : Pat <(v2i32 (vec_ins_or_scal_vec (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32
+def : Pat <(v4i32 (vec_ins_or_scal_vec (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
-def : Pat <(v2i64 (scalar_to_vector (i64
+def : Pat <(v2i64 (vec_ins_or_scal_vec (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
+def : Pat<(v2i32 (vector_insert (v2i32 undef), (i32 GPR32:$Rn), (i64 0))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
+def : Pat<(v4i32 (vector_insert (v4i32 undef), (i32 GPR32:$Rn), (i64 0))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
+def : Pat<(v1i64 (vector_insert (v1i64 undef), (i64 GPR64:$Rn), (i64 0))),
+ (INSERT_SUBREG (v1i64 (IMPLICIT_DEF)), GPR64:$Rn, dsub)>;
+def : Pat<(v2i64 (vector_insert (v2i64 undef), (i64 GPR64:$Rn), (i64 0))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GPR64:$Rn, dsub)>;
+
+def : Pat<(v8i8 (vec_ins_or_scal_vec (i8 (vector_extract (v8i8 V64:$Rm), (i64 0))))),
+ (v8i8 V64:$Rm)>;
+def : Pat<(v4i16 (vec_ins_or_scal_vec (i16 (vector_extract (v4i16 V64:$Rm), (i64 0))))),
+ (v4i16 V64:$Rm)>;
+def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 (vector_extract (v2i32 V64:$Rm), (i64 0))))),
+ (v2i32 V64:$Rm)>;
+def : Pat<(v1i64 (vec_ins_or_scal_vec (i32 (vector_extract (v1i64 V64:$Rm), (i64 0))))),
+ (v1i64 V64:$Rm)>;
+
+def : Pat<(v16i8 (vec_ins_or_scal_vec (i8 (vector_extract (v16i8 V128:$Rm), (i64 0))))),
+ (v16i8 V128:$Rm)>;
+def : Pat<(v8i16 (vec_ins_or_scal_vec (i16 (vector_extract (v8i16 V128:$Rm), (i64 0))))),
+ (v8i16 V128:$Rm)>;
+def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 (vector_extract (v4i32 V128:$Rm), (i64 0))))),
+ (v4i32 V128:$Rm)>;
+def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 (vector_extract (v2i64 V128:$Rm), (i64 0))))),
+ (v2i64 V128:$Rm)>;
+
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
// We must use LD1 to perform vector loads in big-endian.
@@ -6824,10 +6855,10 @@ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
defm INS : SIMDIns;
-def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v16i8 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v8i8 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
@@ -6835,50 +6866,49 @@ def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
(SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
-def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v8i16 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
+def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
-def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
+def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
-
-def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
+def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(i64 FPR64:$Rn), dsub))>;
-def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
+def : Pat<(v4f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
+def : Pat<(v2f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
+def : Pat<(v2f64 (vec_ins_or_scal_vec (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
@@ -8550,7 +8580,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
let Predicates = [HasNEON] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
- : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
+ : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
(ResultTy (EXTRACT_SUBREG
(LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
@@ -8983,11 +9013,11 @@ def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
-def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
+def : Pat<(v1i64 (vec_ins_or_scal_vec GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
-def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
+def : Pat<(v1f64 (vec_ins_or_scal_vec GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
+def : Pat<(v1f64 (vec_ins_or_scal_vec (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
(COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e9e6b6cb68d0d1..18361cf3685642 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2116,6 +2116,21 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
+ case AArch64::G_INSERT_VECTOR_ELT: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
+ if (!SrcVecTy.isPointerVector())
+ return false;
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
+ MRI.setType(I.getOperand(1).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
+ I.getOperand(2).setReg(NewSrc.getReg(0));
+ return true;
+ }
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_SITOFP: {
// If both source and destination regbanks are FPR, then convert the opcode
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 4a1977ba1a00f0..d90fbaff38ce50 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1048,6 +1048,45 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+// Intend to match the last part of
+// AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG)
+bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+ "Expected G_BUILD_VECTOR instruction");
+ bool isConstant = true;
+
+ // Check if the values are the same
+ for (unsigned i = 1; i < MI.getNumOperands(); i++) {
+ auto ConstVal =
+ getAnyConstantVRegValWithLookThrough(MI.getOperand(i).getReg(), MRI);
+ if (!ConstVal.has_value()) {
+ isConstant = false;
+ }
+ }
+
+ if (isConstant)
+ return false;
+
+ return true;
+}
+
+void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register DstReg = B.buildUndef(DstTy).getReg(0);
+
+ for (unsigned i = 1; i < MI.getNumOperands(); i++) {
+ Register SrcReg = MI.getOperand(i).getReg();
+ if (MRI.getVRegDef(SrcReg)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ continue;
+ Register IdxReg = B.buildConstant(LLT::scalar(64), i - 1).getReg(0);
+ DstReg =
+ B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
+ }
+ B.buildCopy(MI.getOperand(0).getReg(), DstReg);
+ MI.eraseFromParent();
+}
+
bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
index f7efaeaa507053..87c1307ad29556 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
@@ -10,12 +10,14 @@ define i32 @bar() {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: mov b1, v0[1]
-; CHECK-NEXT: mov b2, v0[2]
-; CHECK-NEXT: mov b3, v0[3]
-; CHECK-NEXT: mov.h v0[1], v1[0]
-; CHECK-NEXT: mov.h v2[1], v3[0]
+; CHECK-NEXT: mov b2, v0[3]
+; CHECK-NEXT: mov b3, v0[2]
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: fmov w9, s2
+; CHECK-NEXT: mov.h v0[1], w8
+; CHECK-NEXT: mov.h v3[1], w9
; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll.4s v1, v2, #0
+; CHECK-NEXT: ushll.4s v1, v3, #0
; CHECK-NEXT: mov.d v0[1], v1[0]
; CHECK-NEXT: movi.4s v1, #1
; CHECK-NEXT: and.16b v0, v0, v1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
index 70867c2ea2842a..22d1ccc056eb48 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
@@ -42,20 +42,27 @@ body: |
; LOWER-NEXT: {{ $}}
; LOWER-NEXT: %r:_(s32) = COPY $w0
; LOWER-NEXT: %q:_(s32) = COPY $w1
- ; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
+ ; LOWER-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; LOWER-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; LOWER-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %r(s32), [[C]](s64)
+ ; LOWER-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; LOWER-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], %q(s32), [[C1]](s...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/105689
More information about the llvm-commits
mailing list