[llvm] [AArch64][GlobalISel] Lower G_BUILD_VECTOR to G_INSERT_VECTOR_ELT (PR #105686)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 09:07:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
The lowering happens in post-legalizer lowering if any source registers from G_BUILD_VECTOR are not constants.
Add pattern pragment setting `scalar_to_vector ($src)` asequivalent to `vector_insert (undef), ($src), (i61 0)`
---
Patch is 380.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105686.diff
64 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp (+29-3)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+9-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+38-35)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+15)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp (+39)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll (+7-5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir (+17-7)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir (+15-1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir (+126-22)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+57-59)
- (modified) llvm/test/CodeGen/AArch64/abs.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/arm64-dup.ll (+41-20)
- (modified) llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll (+21-24)
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+50-99)
- (modified) llvm/test/CodeGen/AArch64/arm64-ld1.ll (+38-92)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-copy.ll (+164-154)
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+99-99)
- (modified) llvm/test/CodeGen/AArch64/arm64-tbl.ll (+75-84)
- (modified) llvm/test/CodeGen/AArch64/bitcast.ll (+56-50)
- (modified) llvm/test/CodeGen/AArch64/bswap.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/concat-vector.ll (+84-94)
- (modified) llvm/test/CodeGen/AArch64/fabs.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/faddsub.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+158-176)
- (modified) llvm/test/CodeGen/AArch64/fcopysign.ll (+16-18)
- (modified) llvm/test/CodeGen/AArch64/fcvt.ll (+91-133)
- (modified) llvm/test/CodeGen/AArch64/fdiv.ll (+16-24)
- (modified) llvm/test/CodeGen/AArch64/fexplog.ll (+160-155)
- (modified) llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll (+8-6)
- (modified) llvm/test/CodeGen/AArch64/fminimummaximum.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fminmax.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/fmla.ll (+69-99)
- (modified) llvm/test/CodeGen/AArch64/fmul.ll (+16-24)
- (modified) llvm/test/CodeGen/AArch64/fneg.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/fpow.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fpowi.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+32-38)
- (modified) llvm/test/CodeGen/AArch64/fptrunc.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/frem.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/fsincos.ll (+64-62)
- (modified) llvm/test/CodeGen/AArch64/fsqrt.ll (+13-19)
- (modified) llvm/test/CodeGen/AArch64/icmp.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/insertextract.ll (+18-29)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+82-98)
- (modified) llvm/test/CodeGen/AArch64/llvm.exp10.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/load.ll (+25-23)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+14-20)
- (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+81-81)
- (modified) llvm/test/CodeGen/AArch64/neon-extmul.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/neon-perm.ll (+7-6)
- (modified) llvm/test/CodeGen/AArch64/ptradd.ll (+31-21)
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+95-95)
- (modified) llvm/test/CodeGen/AArch64/shift.ll (+93-84)
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+43-27)
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/usub_sat_vec.ll (+20-18)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+184-184)
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+23-19)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+75-77)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711a..c44fe3bcd9cf22 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -61,15 +61,41 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
MachineInstr &IntoMI) const {
+ auto IntoMIIter = IntoMI.getIterator();
+
// Immediate neighbours are already folded.
if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
+ std::next(MI.getIterator()) == IntoMIIter)
return true;
// Convergent instructions cannot be moved in the CFG.
if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
return false;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+ if (MI.isLoadFoldBarrier())
+ return false;
+
+ // If the load is simple, check instructions between MI and IntoMI
+ if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
+ if (MI.memoperands_empty())
+ return false;
+ auto &MMO = **(MI.memoperands_begin());
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Ensure instructions between MI and IntoMI are not affected when combined
+ unsigned Iter = 0;
+ const unsigned MaxIter = 20;
+ for (auto CurrMI = MI.getIterator(); CurrMI != IntoMIIter; ++CurrMI) {
+ if (CurrMI->isLoadFoldBarrier())
+ return false;
+
+ if (Iter++ == MaxIter)
+ return false;
+ }
+
+ return true;
+ }
+
+ return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 3f717c8a60050f..ef00e962f3870f 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -290,6 +290,13 @@ def combine_mul_cmlt : GICombineRule<
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
>;
+def lower_build_insert_vec_elt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return matchLowerBuildToInsertVecElt(*${root}, MRI); }]),
+ (apply [{ applyLowerBuildToInsertVecElt(*${root}, MRI, B); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -300,7 +307,8 @@ def AArch64PostLegalizerLowering
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mull,
- vector_unmerge_lowering, insertelt_nonconst]> {
+ vector_unmerge_lowering, insertelt_nonconst,
+ lower_build_insert_vec_elt]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1053ba9242768a..b849e4c50e4fce 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3302,6 +3302,10 @@ defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
// Pre-fetch.
defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
+def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
+ [(vector_insert undef, node:$src, (i64 0)),
+ (scalar_to_vector node:$src)]>;
+
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
@@ -3310,13 +3314,13 @@ multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
ValueType ScalTy, ValueType VecTy,
Instruction LOADW, Instruction LOADX,
SubRegIndex sub> {
- def : Pat<(VecTy (scalar_to_vector (ScalTy
+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
sub)>;
- def : Pat<(VecTy (scalar_to_vector (ScalTy
+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
@@ -3344,12 +3348,12 @@ defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
@@ -3482,34 +3486,34 @@ def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32
+def : Pat <(v8i8 (vec_ins_or_scal_vec (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32
+def : Pat <(v16i8 (vec_ins_or_scal_vec (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32
+def : Pat <(v4i16 (vec_ins_or_scal_vec (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32
+def : Pat <(v8i16 (vec_ins_or_scal_vec (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32
+def : Pat <(v2i32 (vec_ins_or_scal_vec (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32
+def : Pat <(v4i32 (vec_ins_or_scal_vec (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64
+def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
-def : Pat <(v2i64 (scalar_to_vector (i64
+def : Pat <(v2i64 (vec_ins_or_scal_vec (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
@@ -6824,10 +6828,10 @@ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
defm INS : SIMDIns;
-def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v16i8 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v8i8 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
@@ -6835,50 +6839,49 @@ def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
(SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
-def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v8i16 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
+def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
-def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
+def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
-def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
+def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
-
-def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
+def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(i64 FPR64:$Rn), dsub))>;
-def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
-def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
+def : Pat<(v4f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
+def : Pat<(v2f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
+def : Pat<(v2f64 (vec_ins_or_scal_vec (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
@@ -8550,7 +8553,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
let Predicates = [HasNEON] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
- : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
+ : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
(ResultTy (EXTRACT_SUBREG
(LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
@@ -8983,11 +8986,11 @@ def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
-def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
+def : Pat<(v1i64 (vec_ins_or_scal_vec GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
-def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
+def : Pat<(v1f64 (vec_ins_or_scal_vec GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
+def : Pat<(v1f64 (vec_ins_or_scal_vec (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
(COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e9e6b6cb68d0d1..18361cf3685642 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2116,6 +2116,21 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
+ case AArch64::G_INSERT_VECTOR_ELT: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
+ if (!SrcVecTy.isPointerVector())
+ return false;
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
+ MRI.setType(I.getOperand(1).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
+ I.getOperand(2).setReg(NewSrc.getReg(0));
+ return true;
+ }
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_SITOFP: {
// If both source and destination regbanks are FPR, then convert the opcode
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 4a1977ba1a00f0..d90fbaff38ce50 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1048,6 +1048,45 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+// Intend to match the last part of
+// AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG)
+bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+ "Expected G_BUILD_VECTOR instruction");
+ bool isConstant = true;
+
+ // Check if the values are the same
+ for (unsigned i = 1; i < MI.getNumOperands(); i++) {
+ auto ConstVal =
+ getAnyConstantVRegValWithLookThrough(MI.getOperand(i).getReg(), MRI);
+ if (!ConstVal.has_value()) {
+ isConstant = false;
+ }
+ }
+
+ if (isConstant)
+ return false;
+
+ return true;
+}
+
+void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register DstReg = B.buildUndef(DstTy).getReg(0);
+
+ for (unsigned i = 1; i < MI.getNumOperands(); i++) {
+ Register SrcReg = MI.getOperand(i).getReg();
+ if (MRI.getVRegDef(SrcReg)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ continue;
+ Register IdxReg = B.buildConstant(LLT::scalar(64), i - 1).getReg(0);
+ DstReg =
+ B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
+ }
+ B.buildCopy(MI.getOperand(0).getReg(), DstReg);
+ MI.eraseFromParent();
+}
+
bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
index f7efaeaa507053..87c1307ad29556 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
@@ -10,12 +10,14 @@ define i32 @bar() {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: mov b1, v0[1]
-; CHECK-NEXT: mov b2, v0[2]
-; CHECK-NEXT: mov b3, v0[3]
-; CHECK-NEXT: mov.h v0[1], v1[0]
-; CHECK-NEXT: mov.h v2[1], v3[0]
+; CHECK-NEXT: mov b2, v0[3]
+; CHECK-NEXT: mov b3, v0[2]
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: fmov w9, s2
+; CHECK-NEXT: mov.h v0[1], w8
+; CHECK-NEXT: mov.h v3[1], w9
; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll.4s v1, v2, #0
+; CHECK-NEXT: ushll.4s v1, v3, #0
; CHECK-NEXT: mov.d v0[1], v1[0]
; CHECK-NEXT: movi.4s v1, #1
; CHECK-NEXT: and.16b v0, v0, v1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
index 70867c2ea2842a..0115531dfb09ae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
@@ -42,20 +42,30 @@ body: |
; LOWER-NEXT: {{ $}}
; LOWER-NEXT: %r:_(s32) = COPY $w0
; LOWER-NEXT: %q:_(s32) = COPY $w1
- ; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
+ ; LOWER-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; LOWER-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; LOWER-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %r(s32), [[C]](s64)
+ ; LOWER-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; LOWER-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], %q(s32), [[C1]](s64)
+ ; LOWER-NEXT: %build_vector:_(<2 x s32>) = COPY [[IVEC1]](<2 x s32>)
; LOWER-NEXT: $d0 = COPY %build_vector(<2 x s32>)
; LOWER-NEXT: RET_ReallyLR implicit $d0
;
; SELECT-LABEL: name: dont_combine_different_reg
; SELECT: liveins: $d0, $w0, $w1
; SELECT-NEXT: {{ $}}
- ; SELECT-NEXT: %r:gpr32all = COPY $w0
+ ; SELECT-NEXT: %r:gpr32 = COPY $w0
; SELECT-NEXT: %q:gpr32 = COPY $w1
- ; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
- ; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
- ; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
- ; SELECT-NEXT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
- ; SELECT-NEXT: $d0 = COPY %build_vector
+ ; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
+ ; SELECT-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.dsub
+ ; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 0, %r
+ ; SELECT-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
+ ; SELECT-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
+ ; SELECT-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
+ ; SELECT-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr1]].dsub
+ ; SELECT-NEXT: $d0 = COPY [[COPY1]]
; SELECT-NEXT: RET_ReallyLR implicit $d0
%r:_(s32) = COPY $w0
%q:_(s32) = COPY $w1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-spl...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/105686
More information about the llvm-commits
mailing list