[llvm] [LoongArch] Support getArithmeticInstCost (PR #165187)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 22 17:35:05 PST 2025


https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/165187

>From 0c22eba06905ed33b603b2bd352bdb6fbfc27a94 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 27 Oct 2025 10:26:15 +0800
Subject: [PATCH 1/8] Support getArithmeticInstCost

---
 .../LoongArchTargetTransformInfo.cpp          |  327 +++++
 .../LoongArch/LoongArchTargetTransformInfo.h  |    6 +
 .../Analysis/CostModel/LoongArch/arith-fp.ll  |  135 ++
 .../Analysis/CostModel/LoongArch/arith-int.ll | 1161 +++++++++++++++++
 4 files changed, 1629 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-int.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 5107c8def3799..484b56cd73236 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -14,11 +14,47 @@
 //===----------------------------------------------------------------------===//
 
 #include "LoongArchTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CostTable.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/InstructionCost.h"
+#include <optional>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "loongarchtti"
 
+struct CostKindCosts {
+  unsigned LatencyCost = ~0U;
+  unsigned RecipThroughputCost = ~0U;
+  unsigned CodeSizeCost = ~0U;
+  unsigned SizeAndLatencyCost = ~0U;
+
+  std::optional<unsigned>
+  operator[](TargetTransformInfo::TargetCostKind Kind) const {
+    unsigned Cost = ~0U;
+    switch (Kind) {
+    case llvm::TargetTransformInfo::TCK_Latency:
+      Cost = LatencyCost;
+      break;
+    case TargetTransformInfo::TCK_RecipThroughput:
+      Cost = RecipThroughputCost;
+      break;
+    case TargetTransformInfo::TCK_CodeSize:
+      Cost = CodeSizeCost;
+      break;
+    case TargetTransformInfo::TCK_SizeAndLatency:
+      Cost = SizeAndLatencyCost;
+      break;
+    }
+    if (Cost == ~0U)
+      return std::nullopt;
+    return Cost;
+  }
+};
+using CostKindTblEntry = CostTblEntryT<CostKindCosts>;
+using TypeConversionCostTblEntry = TypeConversionCostTblEntryT<CostKindCosts>;
+
 TypeSize LoongArchTTIImpl::getRegisterBitWidth(
     TargetTransformInfo::RegisterKind K) const {
   TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K);
@@ -133,3 +169,294 @@ LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 
   return Options;
 }
+
+InstructionCost LoongArchTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
+    ArrayRef<const Value *> Args, const Instruction *CxtI) const {
+
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+  // Vector multiply by pow2 will be simplified to shifts.
+  // Vector multiply by -pow2 will be simplified to shifts/negates.
+  if (ISD == ISD::MUL && Op2Info.isConstant() &&
+      (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2())) {
+    InstructionCost Cost =
+        getArithmeticInstrCost(Instruction::Shl, Ty, CostKind,
+                               Op1Info.getNoProps(), Op2Info.getNoProps());
+    if (Op2Info.isNegatedPowerOf2())
+      Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
+    return Cost;
+  }
+
+  // On LoongArch, vector signed division by constants power-of-two are
+  // normally expanded to the sequence SRA + SRL + ADD + SRA.
+  // The OperandValue properties may not be the same as that of the previous
+  // operation; conservatively assume OP_None.
+  if ((ISD == ISD::SDIV || ISD == ISD::SREM) && Op2Info.isConstant() &&
+      Op2Info.isPowerOf2()) {
+    InstructionCost Cost =
+        2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+    Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+    Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+
+    if (ISD == ISD::SREM) {
+      // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
+      Cost +=
+          getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      Cost +=
+          getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+    }
+
+    return Cost;
+  }
+  // Vector unsigned division/remainder will be simplified to shifts/masks.
+  if ((ISD == ISD::UDIV || ISD == ISD::UREM) && Op2Info.isConstant() &&
+      Op2Info.isPowerOf2()) {
+    if (ISD == ISD::UDIV)
+      return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
+                                    Op1Info.getNoProps(), Op2Info.getNoProps());
+    // UREM
+    return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
+                                  Op1Info.getNoProps(), Op2Info.getNoProps());
+  }
+
+  static const CostKindTblEntry LSXCostTable[]{
+      {ISD::ADD, MVT::v16i8, {1, 1}}, // vaddi.b/vadd.b
+      {ISD::ADD, MVT::v8i16, {1, 1}}, // vaddi.h/vadd.h
+      {ISD::ADD, MVT::v4i32, {1, 1}}, // vaddi.w/vadd.w
+      {ISD::ADD, MVT::v2i64, {1, 1}}, // vaddi.d/vadd.d
+
+      {ISD::SUB, MVT::v16i8, {1, 1}}, // vsubi.b/vsub.b
+      {ISD::SUB, MVT::v8i16, {1, 1}}, // vsubi.h/vsub.h
+      {ISD::SUB, MVT::v4i32, {1, 1}}, // vsubi.w/vsub.w
+      {ISD::SUB, MVT::v2i64, {1, 1}}, // vsubi.d/vsub.d
+
+      {ISD::MUL, MVT::v16i8, {4, 2}}, // vmul.b
+      {ISD::MUL, MVT::v8i16, {4, 2}}, // vmul.h
+      {ISD::MUL, MVT::v4i32, {4, 2}}, // vmul.w
+      {ISD::MUL, MVT::v2i64, {4, 2}}, // vmul.d
+
+      {ISD::SDIV, MVT::v16i8, {38, 76}}, // vdiv.b
+      {ISD::SDIV, MVT::v8i16, {24, 44}}, // vdiv.h
+      {ISD::SDIV, MVT::v4i32, {17, 28}}, // vdiv.w
+      {ISD::SDIV, MVT::v2i64, {14, 19}}, // vdiv.d
+
+      {ISD::UDIV, MVT::v16i8, {38, 80}}, // vdiv.bu
+      {ISD::UDIV, MVT::v8i16, {24, 44}}, // vdiv.hu
+      {ISD::UDIV, MVT::v4i32, {17, 28}}, // vdiv.wu
+      {ISD::UDIV, MVT::v2i64, {14, 19}}, // vdiv.du
+
+      {ISD::SREM, MVT::v16i8, {38, 76}}, // vmod.b
+      {ISD::SREM, MVT::v8i16, {24, 44}}, // vmod.h
+      {ISD::SREM, MVT::v4i32, {17, 27}}, // vmod.w
+      {ISD::SREM, MVT::v2i64, {14, 19}}, // vmod.d
+
+      {ISD::UREM, MVT::v16i8, {38, 80}}, // vmod.bu
+      {ISD::UREM, MVT::v8i16, {24, 44}}, // vmod.hu
+      {ISD::UREM, MVT::v4i32, {17, 28}}, // vmod.wu
+      {ISD::UREM, MVT::v2i64, {14, 19}}, // vmod.du
+
+      {ISD::SHL, MVT::v16i8, {1, 1}}, // vslli.b/vsll.b
+      {ISD::SHL, MVT::v8i16, {1, 1}}, // vslli.h/vsll.h
+      {ISD::SHL, MVT::v4i32, {1, 1}}, // vslli.w/vsll.w
+      {ISD::SHL, MVT::v2i64, {1, 1}}, // vslli.d/vsll.d
+
+      {ISD::SRL, MVT::v16i8, {1, 1}}, // vsrli.b/vsrl.b
+      {ISD::SRL, MVT::v8i16, {1, 1}}, // vsrli.h/vsrl.h
+      {ISD::SRL, MVT::v4i32, {1, 1}}, // vsrli.w/vsrl.w
+      {ISD::SRL, MVT::v2i64, {1, 1}}, // vsrli.d/vsrl.d
+
+      {ISD::SRA, MVT::v16i8, {1, 1}}, // vsrai.b/vsra.b
+      {ISD::SRA, MVT::v8i16, {1, 1}}, // vsrai.h/vsra.h
+      {ISD::SRA, MVT::v4i32, {1, 1}}, // vsrai.w/vsra.w
+      {ISD::SRA, MVT::v2i64, {1, 1}}, // vsrai.d/vsra.d
+
+      {ISD::AND, MVT::v16i8, {1, 1}}, // vand.b/vand.v
+      {ISD::AND, MVT::v8i16, {1, 1}}, // vand.v
+      {ISD::AND, MVT::v4i32, {1, 1}}, // vand.v
+      {ISD::AND, MVT::v2i64, {1, 1}}, // vand.v
+
+      {ISD::OR, MVT::v16i8, {1, 1}}, // vori.b/vor.v
+      {ISD::OR, MVT::v8i16, {1, 1}}, // vor.v
+      {ISD::OR, MVT::v4i32, {1, 1}}, // vor.v
+      {ISD::OR, MVT::v2i64, {1, 1}}, // vor.v
+
+      {ISD::XOR, MVT::v16i8, {1, 1}}, // vxori.b/vxor.v
+      {ISD::XOR, MVT::v8i16, {1, 1}}, // vxor.v
+      {ISD::XOR, MVT::v4i32, {1, 1}}, // vxor.v
+      {ISD::XOR, MVT::v2i64, {1, 1}}, // vxor.v
+
+      {ISD::FADD, MVT::v4f32, {3, 1}}, // vfadd.s
+      {ISD::FADD, MVT::v2f64, {3, 1}}, // vfadd.d
+
+      {ISD::FSUB, MVT::v4f32, {3, 1}}, // vfsub.s
+      {ISD::FSUB, MVT::v2f64, {3, 1}}, // vfsub.d
+
+      {ISD::FMUL, MVT::v4f32, {5, 2}}, // vfmul.s
+      {ISD::FMUL, MVT::v2f64, {5, 2}}, // vfmul.d
+
+      {ISD::FDIV, MVT::v4f32, {16, 26}}, // vfdiv.s
+      {ISD::FDIV, MVT::v2f64, {12, 18}}, // vfdiv.d
+  };
+
+  if (ST->hasExtLSX()) {
+    if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  static const CostKindTblEntry LASXUniformConstCostTable[]{
+      {ISD::ADD, MVT::v32i8, {1, 1}},  // xvaddi.b/xvadd.b
+      {ISD::ADD, MVT::v16i16, {1, 1}}, // xvaddi.h/xvadd.h
+      {ISD::ADD, MVT::v8i32, {1, 1}},  // xvaddi.w/xvadd.w
+      {ISD::ADD, MVT::v4i64, {1, 1}},  // xvaddi.d/xvadd.d
+
+      {ISD::SUB, MVT::v32i8, {1, 1}},  // xvsubi.b/xvsub.b
+      {ISD::SUB, MVT::v16i16, {1, 1}}, // xvsubi.h/xvsub.h
+      {ISD::SUB, MVT::v8i32, {1, 1}},  // xvsubi.w/xvsub.w
+      {ISD::SUB, MVT::v4i64, {1, 1}},  // xvsubi.d/xvsub.d
+
+      {ISD::MUL, MVT::v32i8, {4, 2}},  // xvmul.b
+      {ISD::MUL, MVT::v16i16, {4, 2}}, // xvmul.h
+      {ISD::MUL, MVT::v8i32, {4, 2}},  // xvmul.w
+      {ISD::MUL, MVT::v4i64, {4, 2}},  // xvmul.d
+
+      {ISD::SDIV, MVT::v32i8, {38, 76}},  // xvdiv.b
+      {ISD::SDIV, MVT::v16i16, {24, 43}}, // xvdiv.h
+      {ISD::SDIV, MVT::v8i32, {17, 28}},  // xvdiv.w
+      {ISD::SDIV, MVT::v4i64, {14, 19}},  // xvdiv.d
+
+      {ISD::UDIV, MVT::v32i8, {38, 76}},  // xvdiv.bu
+      {ISD::UDIV, MVT::v16i16, {24, 43}}, // xvdiv.hu
+      {ISD::UDIV, MVT::v8i32, {17, 28}},  // xvdiv.wu
+      {ISD::UDIV, MVT::v4i64, {14, 19}},  // xvdiv.du
+
+      {ISD::SREM, MVT::v32i8, {38, 76}},  // xvmod.b
+      {ISD::SREM, MVT::v16i16, {24, 44}}, // xvmod.h
+      {ISD::SREM, MVT::v8i32, {17, 28}},  // xvmod.w
+      {ISD::SREM, MVT::v4i64, {14, 19}},  // xvmod.d
+
+      {ISD::UREM, MVT::v32i8, {38, 76}},  // xvmod.bu
+      {ISD::UREM, MVT::v16i16, {24, 43}}, // xvmod.hu
+      {ISD::UREM, MVT::v8i32, {17, 28}},  // xvmod.wu
+      {ISD::UREM, MVT::v4i64, {14, 19}},  // xvmod.du
+
+      {ISD::SHL, MVT::v32i8, {1, 1}},  // xvslli.b/xvsll.b
+      {ISD::SHL, MVT::v16i16, {1, 1}}, // xvslli.h/xvsll.h
+      {ISD::SHL, MVT::v8i32, {1, 1}},  // xvslli.w/xvsll.w
+      {ISD::SHL, MVT::v4i64, {1, 1}},  // xvslli.d/xvsll.d
+
+      {ISD::SRL, MVT::v32i8, {1, 1}},  // xvsrli.b/xvsrl.b
+      {ISD::SRL, MVT::v16i16, {1, 1}}, // xvsrli.h/xvsrl.h
+      {ISD::SRL, MVT::v8i32, {1, 1}},  // xvsrli.w/xvsrl.w
+      {ISD::SRL, MVT::v4i64, {1, 1}},  // xvsrli.d/xvsrl.d
+
+      {ISD::SRA, MVT::v32i8, {1, 1}},  // xvsrai.b/xvsra.b
+      {ISD::SRA, MVT::v16i16, {1, 1}}, // xvsrai.h/xvsra.h
+      {ISD::SRA, MVT::v8i32, {1, 1}},  // xvsrai.w/xvsra.w
+      {ISD::SRA, MVT::v4i64, {1, 1}},  // xvsrai.d/xvsra.d
+
+      {ISD::AND, MVT::v32i8, {1, 1}},  // xvandi.b/xvand.v
+      {ISD::AND, MVT::v16i16, {1, 1}}, // xvand.v
+      {ISD::AND, MVT::v8i32, {1, 1}},  // xvand.v
+      {ISD::AND, MVT::v4i64, {1, 1}},  // xvand.v
+
+      {ISD::OR, MVT::v32i8, {1, 1}},  // xvori.b/xvor.v
+      {ISD::OR, MVT::v16i16, {1, 1}}, // xvor.v
+      {ISD::OR, MVT::v8i32, {1, 1}},  // xvor.v
+      {ISD::OR, MVT::v4i64, {1, 1}},  // xvor.v
+
+      {ISD::XOR, MVT::v32i8, {1, 1}},  // xvxori.b/xvxor.v
+      {ISD::XOR, MVT::v16i16, {1, 1}}, // xvxor.v
+      {ISD::XOR, MVT::v8i32, {1, 1}},  // xvxor.v
+      {ISD::XOR, MVT::v4i64, {1, 1}},  // xvxor.v
+
+      {ISD::FADD, MVT::v8f32, {3, 1}}, // xvfadd.s
+      {ISD::FADD, MVT::v4f64, {3, 1}}, // xvfadd.d
+
+      {ISD::FSUB, MVT::v8f32, {3, 1}}, // xvfsub.s
+      {ISD::FSUB, MVT::v4f64, {3, 1}}, // xvfsub.d
+
+      {ISD::FMUL, MVT::v8f32, {5, 2}}, // xvfmul.s
+      {ISD::FMUL, MVT::v4f64, {5, 2}}, // xvfmul.d
+
+      {ISD::FDIV, MVT::v8f32, {15, 26}}, // xvfdiv.s
+      {ISD::FDIV, MVT::v4f64, {12, 18}}, // xvfdiv.d
+  };
+
+  if (ST->hasExtLASX()) {
+    if (const auto *Entry =
+            CostTableLookup(LASXUniformConstCostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  static const CostKindTblEntry LA64CostTable[]{
+      {ISD::ADD, MVT::i64, {1, 1}}, // addi.d/add.d
+      {ISD::SUB, MVT::i64, {1, 1}}, // subi.d/sub.d
+      {ISD::MUL, MVT::i64, {4, 2}}, // mul.d
+
+      {ISD::SDIV, MVT::i64, {18, 26}}, // div.d
+      {ISD::UDIV, MVT::i64, {18, 26}}, // div.du
+      {ISD::SREM, MVT::i64, {18, 26}}, // mod.d
+      {ISD::UREM, MVT::i64, {18, 26}}, // mod.du
+
+      {ISD::SHL, MVT::i64, {1, 1}}, // slli.d/sll.d
+      {ISD::SRL, MVT::i64, {1, 1}}, // srli.d/srl.d
+      {ISD::SRA, MVT::i64, {1, 1}}, // srai.d/sra.d
+
+      {ISD::AND, MVT::i64, {1, 1}}, // andi.d/and.d
+      {ISD::OR, MVT::i64, {1, 1}},  // ori.d/or.d
+      {ISD::XOR, MVT::i64, {1, 1}}, // xori.d/xor.d
+
+      {ISD::FADD, MVT::f64, {3, 1}},  // fadd.d
+      {ISD::FSUB, MVT::f64, {3, 1}},  // fsub.d
+      {ISD::FMUL, MVT::f64, {5, 2}},  // fmul.d
+      {ISD::FDIV, MVT::f64, {12, 9}}, // fdiv.d
+  };
+
+  if (ST->is64Bit()) {
+    if (const auto *Entry = CostTableLookup(LA64CostTable, ISD, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  static const CostKindTblEntry LA32CostTable[]{
+      {ISD::ADD, MVT::i32, {1, 1}}, // addi.w/add.w
+      {ISD::SUB, MVT::i32, {1, 1}}, // subi.w/sub.w
+      {ISD::MUL, MVT::i32, {4, 2}}, // mul.w
+
+      {ISD::SDIV, MVT::i32, {11, 24}}, // div.w
+      {ISD::UDIV, MVT::i32, {12, 24}}, // div.wu
+      {ISD::SREM, MVT::i32, {11, 24}}, // mod.w
+      {ISD::UREM, MVT::i32, {12, 24}}, // mod.wu
+
+      {ISD::SHL, MVT::i32, {1, 1}}, // slli.w/sll.w
+      {ISD::SRL, MVT::i32, {1, 1}}, // srli.w/srl.w
+      {ISD::SRA, MVT::i32, {1, 1}}, // srai.w/sra.w
+
+      {ISD::AND, MVT::i32, {1, 1}}, // andi.w/and.w
+      {ISD::OR, MVT::i32, {1, 1}},  // ori.w/or.w
+      {ISD::XOR, MVT::i32, {1, 1}}, // xori.w/xor.w
+
+      {ISD::FADD, MVT::f32, {3, 1}}, // fadd.s
+      {ISD::FSUB, MVT::f32, {3, 1}}, // fsub.s
+      {ISD::FMUL, MVT::f32, {5, 2}}, // fmul.s
+      {ISD::FDIV, MVT::f32, {9, 8}}, // fdiv.s
+  };
+
+  if (const auto *Entry = CostTableLookup(LA32CostTable, ISD, LT.second))
+    if (auto KindCost = Entry->Cost[CostKind])
+      return LT.first * *KindCost;
+
+  // Fallback to the default implementation.
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
+                                       Args, CxtI);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 9b479f9dc0dc5..31445bad5a799 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -54,6 +54,12 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
   bool enableWritePrefetching() const override;
 
   bool shouldExpandReduction(const IntrinsicInst *II) const override;
+  InstructionCost getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
+      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
+      ArrayRef<const Value *> Args = {},
+      const Instruction *CxtI = nullptr) const override;
 
   TTI::MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
new file mode 100644
index 0000000000000..6da0d72d7a2be
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @fadd() {
+; LSX-LABEL: 'fadd'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fadd float poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <2 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <4 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V8F32 = fadd <8 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fadd double poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V4F64 = fadd <4 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; LASX-LABEL: 'fadd'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fadd float poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <2 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <4 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <8 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fadd double poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <4 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+   %V1F32 = fadd float poison, poison
+   %V2F32 = fadd <2 x float> poison, poison
+   %V4F32 = fadd <4 x float> poison, poison
+   %V8F32 = fadd <8 x float> poison, poison
+
+   %V1F64 = fadd double poison, poison
+   %V2F64 = fadd <2 x double> poison, poison
+   %V4F64 = fadd <4 x double> poison, poison
+
+   ret void
+}
+
+define void @fsub() {
+; LSX-LABEL: 'fsub'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fsub float poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <2 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <4 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V8F32 = fsub <8 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fsub double poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V4F64 = fsub <4 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; LASX-LABEL: 'fsub'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fsub float poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <2 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <4 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <8 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fsub double poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <4 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+   %V1F32 = fsub float poison, poison
+   %V2F32 = fsub <2 x float> poison, poison
+   %V4F32 = fsub <4 x float> poison, poison
+   %V8F32 = fsub <8 x float> poison, poison
+
+   %V1F64 = fsub double poison, poison
+   %V2F64 = fsub <2 x double> poison, poison
+   %V4F64 = fsub <4 x double> poison, poison
+
+   ret void
+}
+
+define void @fmul() {
+; LSX-LABEL: 'fmul'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F32 = fmul float poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F32 = fmul <2 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F32 = fmul <4 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:1 for: %V8F32 = fmul <8 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F64 = fmul double poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = fmul <2 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:1 for: %V4F64 = fmul <4 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; LASX-LABEL: 'fmul'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F32 = fmul float poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F32 = fmul <2 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F32 = fmul <4 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V8F32 = fmul <8 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F64 = fmul double poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = fmul <2 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F64 = fmul <4 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+   %V1F32 = fmul float poison, poison
+   %V2F32 = fmul <2 x float> poison, poison
+   %V4F32 = fmul <4 x float> poison, poison
+   %V8F32 = fmul <8 x float> poison, poison
+
+   %V1F64 = fmul double poison, poison
+   %V2F64 = fmul <2 x double> poison, poison
+   %V4F64 = fmul <4 x double> poison, poison
+
+   ret void
+}
+
+define void @fdiv() {
+; LSX-LABEL: 'fdiv'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:9 SizeLat:4 for: %V1F32 = fdiv float poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:16 SizeLat:4 for: %V2F32 = fdiv <2 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:16 SizeLat:4 for: %V4F32 = fdiv <4 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:4 Lat:32 SizeLat:4 for: %V8F32 = fdiv <8 x float> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:12 SizeLat:4 for: %V1F64 = fdiv double poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V2F64 = fdiv <2 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:24 SizeLat:4 for: %V4F64 = fdiv <4 x double> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; LASX-LABEL: 'fdiv'
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:9 SizeLat:4 for: %V1F32 = fdiv float poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:16 SizeLat:4 for: %V2F32 = fdiv <2 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:16 SizeLat:4 for: %V4F32 = fdiv <4 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:15 SizeLat:4 for: %V8F32 = fdiv <8 x float> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:12 SizeLat:4 for: %V1F64 = fdiv double poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V2F64 = fdiv <2 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V4F64 = fdiv <4 x double> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+   %V1F32 = fdiv float poison, poison
+   %V2F32 = fdiv <2 x float> poison, poison
+   %V4F32 = fdiv <4 x float> poison, poison
+   %V8F32 = fdiv <8 x float> poison, poison
+
+   %V1F64 = fdiv double poison, poison
+   %V2F64 = fdiv <2 x double> poison, poison
+   %V4F64 = fdiv <4 x double> poison, poison
+
+   ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-int.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-int.ll
new file mode 100644
index 0000000000000..fb449141526b1
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-int.ll
@@ -0,0 +1,1161 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @add() {
+; LSX-LABEL: 'add'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = add i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = add <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = add <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = add <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = add <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = add <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = add i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = add <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = add <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = add <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = add <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = add i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = add <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = add <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = add <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = add i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = add <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = add <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'add'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = add i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = add <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = add <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = add <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = add <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = add <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = add i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = add <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = add <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = add <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = add <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = add i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = add <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = add <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = add <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = add i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = add <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = add <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = add i8 poison, poison
+   %V2I8 = add <2 x i8> poison, poison
+   %V4I8 = add <4 x i8> poison, poison
+   %V8I8 = add <8 x i8> poison, poison
+   %V16I8 = add <16 x i8> poison, poison
+   %V32I8 = add <32 x i8> poison, poison
+
+   %V1I16 = add i16 poison, poison
+   %V2I16 = add <2 x i16> poison, poison
+   %V4I16 = add <4 x i16> poison, poison
+   %V8I16 = add <8 x i16> poison, poison
+   %V16I16 = add <16 x i16> poison, poison
+
+   %V1I32 = add i32 poison, poison
+   %V2I32 = add <2 x i32> poison, poison
+   %V4I32 = add <4 x i32> poison, poison
+   %V8I32 = add <8 x i32> poison, poison
+
+   %V1I64 = add i64 poison, poison
+   %V2I64 = add <2 x i64> poison, poison
+   %V4I64 = add <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @sub() {
+; LSX-LABEL: 'sub'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = sub i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = sub <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = sub <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = sub <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = sub <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = sub <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = sub i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = sub <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = sub <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = sub <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = sub <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = sub i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = sub <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = sub <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = sub <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = sub i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = sub <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = sub <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'sub'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = sub i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = sub <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = sub <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = sub <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = sub <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = sub <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = sub i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = sub <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = sub <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = sub <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = sub <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = sub i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = sub <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = sub <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = sub <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = sub i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = sub <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = sub <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = sub i8 poison, poison
+   %V2I8 = sub <2 x i8> poison, poison
+   %V4I8 = sub <4 x i8> poison, poison
+   %V8I8 = sub <8 x i8> poison, poison
+   %V16I8 = sub <16 x i8> poison, poison
+   %V32I8 = sub <32 x i8> poison, poison
+
+   %V1I16 = sub i16 poison, poison
+   %V2I16 = sub <2 x i16> poison, poison
+   %V4I16 = sub <4 x i16> poison, poison
+   %V8I16 = sub <8 x i16> poison, poison
+   %V16I16 = sub <16 x i16> poison, poison
+
+   %V1I32 = sub i32 poison, poison
+   %V2I32 = sub <2 x i32> poison, poison
+   %V4I32 = sub <4 x i32> poison, poison
+   %V8I32 = sub <8 x i32> poison, poison
+
+   %V1I64 = sub i64 poison, poison
+   %V2I64 = sub <2 x i64> poison, poison
+   %V4I64 = sub <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @mul() {
+; LSX-LABEL: 'mul'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I8 = mul i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I8 = mul <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I8 = mul <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V8I8 = mul <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V16I8 = mul <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:8 SizeLat:1 for: %V32I8 = mul <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I16 = mul i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I16 = mul <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I16 = mul <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V8I16 = mul <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:8 SizeLat:1 for: %V16I16 = mul <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I32 = mul i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I32 = mul <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I32 = mul <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:8 SizeLat:1 for: %V8I32 = mul <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I64 = mul i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I64 = mul <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:8 SizeLat:1 for: %V4I64 = mul <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'mul'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I8 = mul i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I8 = mul <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I8 = mul <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V8I8 = mul <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V16I8 = mul <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V32I8 = mul <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I16 = mul i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I16 = mul <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I16 = mul <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V8I16 = mul <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V16I16 = mul <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I32 = mul i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I32 = mul <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I32 = mul <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V8I32 = mul <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V1I64 = mul i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V2I64 = mul <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %V4I64 = mul <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = mul i8 poison, poison
+   %V2I8 = mul <2 x i8> poison, poison
+   %V4I8 = mul <4 x i8> poison, poison
+   %V8I8 = mul <8 x i8> poison, poison
+   %V16I8 = mul <16 x i8> poison, poison
+   %V32I8 = mul <32 x i8> poison, poison
+
+   %V1I16 = mul i16 poison, poison
+   %V2I16 = mul <2 x i16> poison, poison
+   %V4I16 = mul <4 x i16> poison, poison
+   %V8I16 = mul <8 x i16> poison, poison
+   %V16I16 = mul <16 x i16> poison, poison
+
+   %V1I32 = mul i32 poison, poison
+   %V2I32 = mul <2 x i32> poison, poison
+   %V4I32 = mul <4 x i32> poison, poison
+   %V8I32 = mul <8 x i32> poison, poison
+
+   %V1I64 = mul i64 poison, poison
+   %V2I64 = mul <2 x i64> poison, poison
+   %V4I64 = mul <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @sdiv() {
+; LSX-LABEL: 'sdiv'
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = sdiv i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = sdiv <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = sdiv <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = sdiv <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = sdiv <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:4 Lat:76 SizeLat:4 for: %V32I8 = sdiv <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = sdiv i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = sdiv <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = sdiv <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = sdiv <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:48 SizeLat:4 for: %V16I16 = sdiv <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = sdiv i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = sdiv <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = sdiv <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:4 Lat:34 SizeLat:4 for: %V8I32 = sdiv <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = sdiv i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = sdiv <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:28 SizeLat:4 for: %V4I64 = sdiv <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'sdiv'
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = sdiv i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = sdiv <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = sdiv <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = sdiv <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = sdiv <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V32I8 = sdiv <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = sdiv i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = sdiv <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = sdiv <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = sdiv <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:4 Lat:24 SizeLat:4 for: %V16I16 = sdiv <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = sdiv i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = sdiv <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = sdiv <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V8I32 = sdiv <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = sdiv i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = sdiv <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V4I64 = sdiv <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = sdiv i8 poison, poison
+   %V2I8 = sdiv <2 x i8> poison, poison
+   %V4I8 = sdiv <4 x i8> poison, poison
+   %V8I8 = sdiv <8 x i8> poison, poison
+   %V16I8 = sdiv <16 x i8> poison, poison
+   %V32I8 = sdiv <32 x i8> poison, poison
+
+   %V1I16 = sdiv i16 poison, poison
+   %V2I16 = sdiv <2 x i16> poison, poison
+   %V4I16 = sdiv <4 x i16> poison, poison
+   %V8I16 = sdiv <8 x i16> poison, poison
+   %V16I16 = sdiv <16 x i16> poison, poison
+
+   %V1I32 = sdiv i32 poison, poison
+   %V2I32 = sdiv <2 x i32> poison, poison
+   %V4I32 = sdiv <4 x i32> poison, poison
+   %V8I32 = sdiv <8 x i32> poison, poison
+
+   %V1I64 = sdiv i64 poison, poison
+   %V2I64 = sdiv <2 x i64> poison, poison
+   %V4I64 = sdiv <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @udiv() {
+; LSX-LABEL: 'udiv'
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = udiv i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = udiv <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = udiv <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = udiv <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = udiv <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:4 Lat:76 SizeLat:4 for: %V32I8 = udiv <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = udiv i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = udiv <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = udiv <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = udiv <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:48 SizeLat:4 for: %V16I16 = udiv <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = udiv i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = udiv <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = udiv <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:4 Lat:34 SizeLat:4 for: %V8I32 = udiv <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = udiv i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = udiv <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:28 SizeLat:4 for: %V4I64 = udiv <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'udiv'
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = udiv i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = udiv <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = udiv <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = udiv <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = udiv <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V32I8 = udiv <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = udiv i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = udiv <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = udiv <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = udiv <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:4 Lat:24 SizeLat:4 for: %V16I16 = udiv <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = udiv i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = udiv <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = udiv <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V8I32 = udiv <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = udiv i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = udiv <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V4I64 = udiv <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = udiv i8 poison, poison
+   %V2I8 = udiv <2 x i8> poison, poison
+   %V4I8 = udiv <4 x i8> poison, poison
+   %V8I8 = udiv <8 x i8> poison, poison
+   %V16I8 = udiv <16 x i8> poison, poison
+   %V32I8 = udiv <32 x i8> poison, poison
+
+   %V1I16 = udiv i16 poison, poison
+   %V2I16 = udiv <2 x i16> poison, poison
+   %V4I16 = udiv <4 x i16> poison, poison
+   %V8I16 = udiv <8 x i16> poison, poison
+   %V16I16 = udiv <16 x i16> poison, poison
+
+   %V1I32 = udiv i32 poison, poison
+   %V2I32 = udiv <2 x i32> poison, poison
+   %V4I32 = udiv <4 x i32> poison, poison
+   %V8I32 = udiv <8 x i32> poison, poison
+
+   %V1I64 = udiv i64 poison, poison
+   %V2I64 = udiv <2 x i64> poison, poison
+   %V4I64 = udiv <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @srem() {
+; LSX-LABEL: 'srem'
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = srem i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = srem <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = srem <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = srem <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = srem <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:4 Lat:76 SizeLat:4 for: %V32I8 = srem <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = srem i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = srem <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = srem <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = srem <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:48 SizeLat:4 for: %V16I16 = srem <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = srem i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = srem <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = srem <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:34 SizeLat:4 for: %V8I32 = srem <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = srem i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = srem <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:28 SizeLat:4 for: %V4I64 = srem <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'srem'
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = srem i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = srem <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = srem <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = srem <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = srem <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V32I8 = srem <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = srem i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = srem <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = srem <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = srem <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V16I16 = srem <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = srem i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = srem <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = srem <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V8I32 = srem <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = srem i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = srem <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V4I64 = srem <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = srem i8 poison, poison
+   %V2I8 = srem <2 x i8> poison, poison
+   %V4I8 = srem <4 x i8> poison, poison
+   %V8I8 = srem <8 x i8> poison, poison
+   %V16I8 = srem <16 x i8> poison, poison
+   %V32I8 = srem <32 x i8> poison, poison
+
+   %V1I16 = srem i16 poison, poison
+   %V2I16 = srem <2 x i16> poison, poison
+   %V4I16 = srem <4 x i16> poison, poison
+   %V8I16 = srem <8 x i16> poison, poison
+   %V16I16 = srem <16 x i16> poison, poison
+
+   %V1I32 = srem i32 poison, poison
+   %V2I32 = srem <2 x i32> poison, poison
+   %V4I32 = srem <4 x i32> poison, poison
+   %V8I32 = srem <8 x i32> poison, poison
+
+   %V1I64 = srem i64 poison, poison
+   %V2I64 = srem <2 x i64> poison, poison
+   %V4I64 = srem <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @urem() {
+; LSX-LABEL: 'urem'
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = urem i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = urem <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = urem <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = urem <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = urem <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:4 Lat:76 SizeLat:4 for: %V32I8 = urem <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = urem i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = urem <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = urem <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = urem <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:48 SizeLat:4 for: %V16I16 = urem <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = urem i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = urem <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = urem <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:4 Lat:34 SizeLat:4 for: %V8I32 = urem <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = urem i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = urem <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:28 SizeLat:4 for: %V4I64 = urem <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'urem'
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I8 = urem i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V2I8 = urem <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V4I8 = urem <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V8I8 = urem <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:4 Lat:38 SizeLat:4 for: %V16I8 = urem <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:38 SizeLat:4 for: %V32I8 = urem <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I16 = urem i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V2I16 = urem <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V4I16 = urem <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:24 SizeLat:4 for: %V8I16 = urem <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:4 Lat:24 SizeLat:4 for: %V16I16 = urem <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I32 = urem i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V2I32 = urem <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V4I32 = urem <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:17 SizeLat:4 for: %V8I32 = urem <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:4 Lat:18 SizeLat:4 for: %V1I64 = urem i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V2I64 = urem <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:14 SizeLat:4 for: %V4I64 = urem <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = urem i8 poison, poison
+   %V2I8 = urem <2 x i8> poison, poison
+   %V4I8 = urem <4 x i8> poison, poison
+   %V8I8 = urem <8 x i8> poison, poison
+   %V16I8 = urem <16 x i8> poison, poison
+   %V32I8 = urem <32 x i8> poison, poison
+
+   %V1I16 = urem i16 poison, poison
+   %V2I16 = urem <2 x i16> poison, poison
+   %V4I16 = urem <4 x i16> poison, poison
+   %V8I16 = urem <8 x i16> poison, poison
+   %V16I16 = urem <16 x i16> poison, poison
+
+   %V1I32 = urem i32 poison, poison
+   %V2I32 = urem <2 x i32> poison, poison
+   %V4I32 = urem <4 x i32> poison, poison
+   %V8I32 = urem <8 x i32> poison, poison
+
+   %V1I64 = urem i64 poison, poison
+   %V2I64 = urem <2 x i64> poison, poison
+   %V4I64 = urem <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @and() {
+; LSX-LABEL: 'and'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = and i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = and <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = and <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = and <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = and <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = and <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = and i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = and <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = and <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = and <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = and <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = and i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = and <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = and <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = and <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = and i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = and <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = and <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'and'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = and i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = and <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = and <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = and <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = and <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = and <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = and i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = and <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = and <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = and <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = and <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = and i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = and <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = and <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = and <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = and i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = and <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = and <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = and i8 poison, poison
+   %V2I8 = and <2 x i8> poison, poison
+   %V4I8 = and <4 x i8> poison, poison
+   %V8I8 = and <8 x i8> poison, poison
+   %V16I8 = and <16 x i8> poison, poison
+   %V32I8 = and <32 x i8> poison, poison
+
+   %V1I16 = and i16 poison, poison
+   %V2I16 = and <2 x i16> poison, poison
+   %V4I16 = and <4 x i16> poison, poison
+   %V8I16 = and <8 x i16> poison, poison
+   %V16I16 = and <16 x i16> poison, poison
+
+   %V1I32 = and i32 poison, poison
+   %V2I32 = and <2 x i32> poison, poison
+   %V4I32 = and <4 x i32> poison, poison
+   %V8I32 = and <8 x i32> poison, poison
+
+   %V1I64 = and i64 poison, poison
+   %V2I64 = and <2 x i64> poison, poison
+   %V4I64 = and <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @or() {
+; LSX-LABEL: 'or'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = or i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = or <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = or <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = or <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = or <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = or <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = or i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = or <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = or <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = or <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = or <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = or i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = or <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = or <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = or <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = or i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = or <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = or <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'or'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = or i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = or <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = or <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = or <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = or <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = or <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = or i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = or <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = or <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = or <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = or <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = or i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = or <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = or <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = or <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = or i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = or <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = or <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = or i8 poison, poison
+   %V2I8 = or <2 x i8> poison, poison
+   %V4I8 = or <4 x i8> poison, poison
+   %V8I8 = or <8 x i8> poison, poison
+   %V16I8 = or <16 x i8> poison, poison
+   %V32I8 = or <32 x i8> poison, poison
+
+   %V1I16 = or i16 poison, poison
+   %V2I16 = or <2 x i16> poison, poison
+   %V4I16 = or <4 x i16> poison, poison
+   %V8I16 = or <8 x i16> poison, poison
+   %V16I16 = or <16 x i16> poison, poison
+
+   %V1I32 = or i32 poison, poison
+   %V2I32 = or <2 x i32> poison, poison
+   %V4I32 = or <4 x i32> poison, poison
+   %V8I32 = or <8 x i32> poison, poison
+
+   %V1I64 = or i64 poison, poison
+   %V2I64 = or <2 x i64> poison, poison
+   %V4I64 = or <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @xor() {
+; LSX-LABEL: 'xor'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = xor i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = xor <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = xor <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = xor <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = xor <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = xor <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = xor i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = xor <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = xor <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = xor <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = xor <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = xor i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = xor <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = xor <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = xor <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = xor i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = xor <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = xor <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'xor'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = xor i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = xor <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = xor <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = xor <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = xor <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = xor <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = xor i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = xor <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = xor <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = xor <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = xor <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = xor i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = xor <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = xor <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = xor <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = xor i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = xor <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = xor <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = xor i8 poison, poison
+   %V2I8 = xor <2 x i8> poison, poison
+   %V4I8 = xor <4 x i8> poison, poison
+   %V8I8 = xor <8 x i8> poison, poison
+   %V16I8 = xor <16 x i8> poison, poison
+   %V32I8 = xor <32 x i8> poison, poison
+
+   %V1I16 = xor i16 poison, poison
+   %V2I16 = xor <2 x i16> poison, poison
+   %V4I16 = xor <4 x i16> poison, poison
+   %V8I16 = xor <8 x i16> poison, poison
+   %V16I16 = xor <16 x i16> poison, poison
+
+   %V1I32 = xor i32 poison, poison
+   %V2I32 = xor <2 x i32> poison, poison
+   %V4I32 = xor <4 x i32> poison, poison
+   %V8I32 = xor <8 x i32> poison, poison
+
+   %V1I64 = xor i64 poison, poison
+   %V2I64 = xor <2 x i64> poison, poison
+   %V4I64 = xor <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @shl() {
+; LSX-LABEL: 'shl'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = shl i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = shl <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = shl <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = shl <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = shl <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = shl <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = shl i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = shl <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = shl <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = shl <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = shl <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = shl i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = shl <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = shl <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = shl <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = shl i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = shl <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = shl <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'shl'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = shl i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = shl <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = shl <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = shl <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = shl <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = shl <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = shl i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = shl <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = shl <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = shl <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = shl <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = shl i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = shl <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = shl <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = shl <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = shl i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = shl <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = shl <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = shl i8 poison, poison
+   %V2I8 = shl <2 x i8> poison, poison
+   %V4I8 = shl <4 x i8> poison, poison
+   %V8I8 = shl <8 x i8> poison, poison
+   %V16I8 = shl <16 x i8> poison, poison
+   %V32I8 = shl <32 x i8> poison, poison
+
+   %V1I16 = shl i16 poison, poison
+   %V2I16 = shl <2 x i16> poison, poison
+   %V4I16 = shl <4 x i16> poison, poison
+   %V8I16 = shl <8 x i16> poison, poison
+   %V16I16 = shl <16 x i16> poison, poison
+
+   %V1I32 = shl i32 poison, poison
+   %V2I32 = shl <2 x i32> poison, poison
+   %V4I32 = shl <4 x i32> poison, poison
+   %V8I32 = shl <8 x i32> poison, poison
+
+   %V1I64 = shl i64 poison, poison
+   %V2I64 = shl <2 x i64> poison, poison
+   %V4I64 = shl <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @lshr() {
+; LSX-LABEL: 'lshr'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = lshr i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = lshr <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = lshr <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = lshr <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = lshr <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = lshr <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = lshr i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = lshr <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = lshr <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = lshr <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = lshr <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = lshr i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = lshr <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = lshr <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = lshr <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = lshr i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = lshr <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = lshr <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'lshr'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = lshr i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = lshr <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = lshr <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = lshr <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = lshr <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = lshr <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = lshr i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = lshr <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = lshr <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = lshr <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = lshr <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = lshr i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = lshr <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = lshr <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = lshr <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = lshr i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = lshr <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = lshr <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = lshr i8 poison, poison
+   %V2I8 = lshr <2 x i8> poison, poison
+   %V4I8 = lshr <4 x i8> poison, poison
+   %V8I8 = lshr <8 x i8> poison, poison
+   %V16I8 = lshr <16 x i8> poison, poison
+   %V32I8 = lshr <32 x i8> poison, poison
+
+   %V1I16 = lshr i16 poison, poison
+   %V2I16 = lshr <2 x i16> poison, poison
+   %V4I16 = lshr <4 x i16> poison, poison
+   %V8I16 = lshr <8 x i16> poison, poison
+   %V16I16 = lshr <16 x i16> poison, poison
+
+   %V1I32 = lshr i32 poison, poison
+   %V2I32 = lshr <2 x i32> poison, poison
+   %V4I32 = lshr <4 x i32> poison, poison
+   %V8I32 = lshr <8 x i32> poison, poison
+
+   %V1I64 = lshr i64 poison, poison
+   %V2I64 = lshr <2 x i64> poison, poison
+   %V4I64 = lshr <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+define i32 @ashr() {
+; LSX-LABEL: 'ashr'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = ashr i8 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = ashr <2 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = ashr <4 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = ashr <8 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = ashr <16 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = ashr <32 x i8> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = ashr i16 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = ashr <2 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = ashr <4 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = ashr <8 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = ashr <16 x i16> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = ashr i32 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = ashr <2 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = ashr <4 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = ashr <8 x i32> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = ashr i64 poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = ashr <2 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = ashr <4 x i64> poison, poison
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'ashr'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = ashr i8 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = ashr <2 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = ashr <4 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = ashr <8 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = ashr <16 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = ashr <32 x i8> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = ashr i16 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = ashr <2 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = ashr <4 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = ashr <8 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = ashr <16 x i16> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = ashr i32 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = ashr <2 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = ashr <4 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = ashr <8 x i32> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = ashr i64 poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = ashr <2 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = ashr <4 x i64> poison, poison
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = ashr i8 poison, poison
+   %V2I8 = ashr <2 x i8> poison, poison
+   %V4I8 = ashr <4 x i8> poison, poison
+   %V8I8 = ashr <8 x i8> poison, poison
+   %V16I8 = ashr <16 x i8> poison, poison
+   %V32I8 = ashr <32 x i8> poison, poison
+
+   %V1I16 = ashr i16 poison, poison
+   %V2I16 = ashr <2 x i16> poison, poison
+   %V4I16 = ashr <4 x i16> poison, poison
+   %V8I16 = ashr <8 x i16> poison, poison
+   %V16I16 = ashr <16 x i16> poison, poison
+
+   %V1I32 = ashr i32 poison, poison
+   %V2I32 = ashr <2 x i32> poison, poison
+   %V4I32 = ashr <4 x i32> poison, poison
+   %V8I32 = ashr <8 x i32> poison, poison
+
+   %V1I64 = ashr i64 poison, poison
+   %V2I64 = ashr <2 x i64> poison, poison
+   %V4I64 = ashr <4 x i64> poison, poison
+
+   ret i32 poison
+}
+
+
+define i32 @sdiv_constant() {
+; LSX-LABEL: 'sdiv_constant'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V1I8 = sdiv i8 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = sdiv <2 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = sdiv <4 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V8I8 = sdiv <8 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = sdiv <16 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:4 for: %V32I8 = sdiv <32 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V1I16 = sdiv i16 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = sdiv <2 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V4I16 = sdiv <4 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = sdiv <8 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:4 for: %V16I16 = sdiv <16 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V1I32 = sdiv i32 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V2I32 = sdiv <2 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = sdiv <4 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:4 for: %V8I32 = sdiv <8 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V1I64 = sdiv i64 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = sdiv <2 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:4 for: %V4I64 = sdiv <4 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'sdiv_constant'
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V1I8 = sdiv i8 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = sdiv <2 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = sdiv <4 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V8I8 = sdiv <8 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = sdiv <16 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V32I8 = sdiv <32 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V1I16 = sdiv i16 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = sdiv <2 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V4I16 = sdiv <4 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = sdiv <8 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V16I16 = sdiv <16 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V1I32 = sdiv i32 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V2I32 = sdiv <2 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = sdiv <4 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V8I32 = sdiv <8 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V1I64 = sdiv i64 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = sdiv <2 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 4 for: %V4I64 = sdiv <4 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = sdiv i8 poison, 2
+   %V2I8 = sdiv <2 x i8> poison, splat (i8 2)
+   %V4I8 = sdiv <4 x i8> poison, splat (i8 2)
+   %V8I8 = sdiv <8 x i8> poison, splat (i8 2)
+   %V16I8 = sdiv <16 x i8> poison, splat (i8 2)
+   %V32I8 = sdiv <32 x i8> poison, splat (i8 2)
+
+   %V1I16 = sdiv i16 poison, 2
+   %V2I16 = sdiv <2 x i16> poison, splat (i16 2)
+   %V4I16 = sdiv <4 x i16> poison, splat (i16 2)
+   %V8I16 = sdiv <8 x i16> poison, splat (i16 2)
+   %V16I16 = sdiv <16 x i16> poison, splat (i16 2)
+
+   %V1I32 = sdiv i32 poison, 2
+   %V2I32 = sdiv <2 x i32> poison, splat (i32 2)
+   %V4I32 = sdiv <4 x i32> poison, splat (i32 2)
+   %V8I32 = sdiv <8 x i32> poison, splat (i32 2)
+
+   %V1I64 = sdiv i64 poison, 2
+   %V2I64 = sdiv <2 x i64> poison, splat (i64 2)
+   %V4I64 = sdiv <4 x i64> poison, splat (i64 2)
+
+   ret i32 poison
+}
+
+define i32 @udiv_constant() {
+; LSX-LABEL: 'udiv_constant'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = udiv i8 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = udiv <2 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = udiv <4 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = udiv <8 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = udiv <16 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = udiv <32 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = udiv i16 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = udiv <2 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = udiv <4 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = udiv <8 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = udiv <16 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = udiv i32 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = udiv <2 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = udiv <4 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = udiv <8 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = udiv i64 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = udiv <2 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = udiv <4 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'udiv_constant'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = udiv i8 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = udiv <2 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = udiv <4 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = udiv <8 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = udiv <16 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = udiv <32 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = udiv i16 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = udiv <2 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = udiv <4 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = udiv <8 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = udiv <16 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = udiv i32 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = udiv <2 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = udiv <4 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = udiv <8 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = udiv i64 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = udiv <2 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = udiv <4 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = udiv i8 poison, 2
+   %V2I8 = udiv <2 x i8> poison, splat (i8 2)
+   %V4I8 = udiv <4 x i8> poison, splat (i8 2)
+   %V8I8 = udiv <8 x i8> poison, splat (i8 2)
+   %V16I8 = udiv <16 x i8> poison, splat (i8 2)
+   %V32I8 = udiv <32 x i8> poison, splat (i8 2)
+
+   %V1I16 = udiv i16 poison, 2
+   %V2I16 = udiv <2 x i16> poison, splat (i16 2)
+   %V4I16 = udiv <4 x i16> poison, splat (i16 2)
+   %V8I16 = udiv <8 x i16> poison, splat (i16 2)
+   %V16I16 = udiv <16 x i16> poison, splat (i16 2)
+
+   %V1I32 = udiv i32 poison, 2
+   %V2I32 = udiv <2 x i32> poison, splat (i32 2)
+   %V4I32 = udiv <4 x i32> poison, splat (i32 2)
+   %V8I32 = udiv <8 x i32> poison, splat (i32 2)
+
+   %V1I64 = udiv i64 poison, 2
+   %V2I64 = udiv <2 x i64> poison, splat (i64 2)
+   %V4I64 = udiv <4 x i64> poison, splat (i64 2)
+
+   ret i32 poison
+}
+
+define i32 @srem_constant() {
+; LSX-LABEL: 'srem_constant'
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I8 = srem i8 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I8 = srem <2 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I8 = srem <4 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V8I8 = srem <8 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V16I8 = srem <16 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:6 Lat:18 SizeLat:6 for: %V32I8 = srem <32 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I16 = srem i16 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I16 = srem <2 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I16 = srem <4 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V8I16 = srem <8 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:6 Lat:18 SizeLat:6 for: %V16I16 = srem <16 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I32 = srem i32 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I32 = srem <2 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I32 = srem <4 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:6 Lat:18 SizeLat:6 for: %V8I32 = srem <8 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I64 = srem i64 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I64 = srem <2 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:6 Lat:18 SizeLat:6 for: %V4I64 = srem <4 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'srem_constant'
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I8 = srem i8 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I8 = srem <2 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I8 = srem <4 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V8I8 = srem <8 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V16I8 = srem <16 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V32I8 = srem <32 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I16 = srem i16 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I16 = srem <2 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I16 = srem <4 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V8I16 = srem <8 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V16I16 = srem <16 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I32 = srem i32 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I32 = srem <2 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I32 = srem <4 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V8I32 = srem <8 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V1I64 = srem i64 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V2I64 = srem <2 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:9 SizeLat:6 for: %V4I64 = srem <4 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = srem i8 poison, 2
+   %V2I8 = srem <2 x i8> poison, splat (i8 2)
+   %V4I8 = srem <4 x i8> poison, splat (i8 2)
+   %V8I8 = srem <8 x i8> poison, splat (i8 2)
+   %V16I8 = srem <16 x i8> poison, splat (i8 2)
+   %V32I8 = srem <32 x i8> poison, splat (i8 2)
+
+   %V1I16 = srem i16 poison, 2
+   %V2I16 = srem <2 x i16> poison, splat (i16 2)
+   %V4I16 = srem <4 x i16> poison, splat (i16 2)
+   %V8I16 = srem <8 x i16> poison, splat (i16 2)
+   %V16I16 = srem <16 x i16> poison, splat (i16 2)
+
+   %V1I32 = srem i32 poison, 2
+   %V2I32 = srem <2 x i32> poison, splat (i32 2)
+   %V4I32 = srem <4 x i32> poison, splat (i32 2)
+   %V8I32 = srem <8 x i32> poison, splat (i32 2)
+
+   %V1I64 = srem i64 poison, 2
+   %V2I64 = srem <2 x i64> poison, splat (i64 2)
+   %V4I64 = srem <4 x i64> poison, splat (i64 2)
+
+   ret i32 poison
+}
+
+define i32 @urem_constant() {
+; LSX-LABEL: 'urem_constant'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = urem i8 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = urem <2 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = urem <4 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = urem <8 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = urem <16 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V32I8 = urem <32 x i8> poison, splat (i8 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = urem i16 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = urem <2 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = urem <4 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = urem <8 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V16I16 = urem <16 x i16> poison, splat (i16 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = urem i32 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = urem <2 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = urem <4 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V8I32 = urem <8 x i32> poison, splat (i32 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = urem i64 poison, 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = urem <2 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = urem <4 x i64> poison, splat (i64 2)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'urem_constant'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I8 = urem i8 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I8 = urem <2 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I8 = urem <4 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = urem <8 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = urem <16 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = urem <32 x i8> poison, splat (i8 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I16 = urem i16 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I16 = urem <2 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = urem <4 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = urem <8 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = urem <16 x i16> poison, splat (i16 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I32 = urem i32 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = urem <2 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = urem <4 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = urem <8 x i32> poison, splat (i32 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V1I64 = urem i64 poison, 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = urem <2 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = urem <4 x i64> poison, splat (i64 2)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+   %V1I8 = urem i8 poison, 2
+   %V2I8 = urem <2 x i8> poison, splat (i8 2)
+   %V4I8 = urem <4 x i8> poison, splat (i8 2)
+   %V8I8 = urem <8 x i8> poison, splat (i8 2)
+   %V16I8 = urem <16 x i8> poison, splat (i8 2)
+   %V32I8 = urem <32 x i8> poison, splat (i8 2)
+
+   %V1I16 = urem i16 poison, 2
+   %V2I16 = urem <2 x i16> poison, splat (i16 2)
+   %V4I16 = urem <4 x i16> poison, splat (i16 2)
+   %V8I16 = urem <8 x i16> poison, splat (i16 2)
+   %V16I16 = urem <16 x i16> poison, splat (i16 2)
+
+   %V1I32 = urem i32 poison, 2
+   %V2I32 = urem <2 x i32> poison, splat (i32 2)
+   %V4I32 = urem <4 x i32> poison, splat (i32 2)
+   %V8I32 = urem <8 x i32> poison, splat (i32 2)
+
+   %V1I64 = urem i64 poison, 2
+   %V2I64 = urem <2 x i64> poison, splat (i64 2)
+   %V4I64 = urem <4 x i64> poison, splat (i64 2)
+
+   ret i32 poison
+}

>From 4a534d2f30f680dd89d3321cb2d14240a0112a67 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 28 Oct 2025 17:20:44 +0800
Subject: [PATCH 2/8] Support getVectorInstrCost

---
 .../LoongArchTargetTransformInfo.cpp          |  74 +++-
 .../LoongArch/LoongArchTargetTransformInfo.h  |   6 +
 .../CostModel/LoongArch/vector-extract.ll     | 378 ++++++++++++++++++
 .../LoongArch/vector-insert-value.ll          | 361 +++++++++++++++++
 .../CostModel/LoongArch/vector-insert.ll      | 363 +++++++++++++++++
 5 files changed, 1181 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/vector-extract.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/vector-insert-value.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/vector-insert.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 484b56cd73236..1f5eb4792d9d6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -459,4 +459,76 @@ InstructionCost LoongArchTTIImpl::getArithmeticInstrCost(
   // Fallback to the default implementation.
   return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
                                        Args, CxtI);
-}
\ No newline at end of file
+}
+
+InstructionCost LoongArchTTIImpl::getVectorInstrCost(
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    const Value *Op0, const Value *Op1) const {
+
+  assert(Val->isVectorTy() && "This must be a vector type");
+
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  InstructionCost RegisterFileMoveCost = 0;
+
+  static const CostKindTblEntry CostTable[]{
+      {ISD::EXTRACT_VECTOR_ELT, MVT::i8, {3, 4}},  // vpickve2gr.b
+      {ISD::EXTRACT_VECTOR_ELT, MVT::i16, {3, 4}}, // vpickve2gr.h
+      {ISD::EXTRACT_VECTOR_ELT, MVT::i32, {3, 4}}, // vpickve2gr.w
+      {ISD::EXTRACT_VECTOR_ELT, MVT::i64, {3, 4}}, // vpickve2gr.d
+
+      {ISD::EXTRACT_VECTOR_ELT, MVT::f32, {1, 1}}, // vreplvei.w
+      {ISD::EXTRACT_VECTOR_ELT, MVT::f64, {1, 1}}, // vreplvei.d
+  };
+
+  if (Index != -1U &&
+      (ISD == ISD::EXTRACT_VECTOR_ELT || ISD == ISD::INSERT_VECTOR_ELT)) {
+
+    if (!LT.second.isVector())
+      return TTI::TCC_Free;
+
+    unsigned SizeInBits = LT.second.getSizeInBits();
+    unsigned NumElts = LT.second.getVectorNumElements();
+    Index = Index % NumElts;
+
+    if (SizeInBits > 128 && Index >= NumElts / 2 && !Val->isFPOrFPVectorTy()) {
+      RegisterFileMoveCost += (ISD == ISD::INSERT_VECTOR_ELT ? 2 : 1);
+    }
+
+    if (ISD == ISD::INSERT_VECTOR_ELT) {
+      // vldi/vrepli
+      if (isa_and_nonnull<PoisonValue>(Op0) && isa_and_nonnull<Constant>(Op1)) {
+        return 1 + RegisterFileMoveCost;
+      }
+
+      // vldi + vextrins
+      if (isa_and_nonnull<ConstantFP>(Op1)) {
+        return 2 + RegisterFileMoveCost;
+      }
+
+      // vextrins
+      if (Op1 &&
+          (Op1->getType()->isFloatTy() || Op1->getType()->isDoubleTy())) {
+        return 1 + RegisterFileMoveCost;
+      }
+
+      // vinsgr2vr
+      if (CostKind == TTI::TCK_RecipThroughput) {
+        return 4 + RegisterFileMoveCost;
+      }
+      if (CostKind == TTI::TCK_Latency) {
+        return 3 + RegisterFileMoveCost;
+      }
+    }
+
+    if (auto *Entry =
+            CostTableLookup(CostTable, ISD, LT.second.getScalarType()))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return *KindCost + RegisterFileMoveCost;
+  }
+
+  return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1) +
+         RegisterFileMoveCost;
+}
+
+// TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 31445bad5a799..8d0c05ddd8b6a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -61,6 +61,12 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
       ArrayRef<const Value *> Args = {},
       const Instruction *CxtI = nullptr) const override;
 
+  using BaseT::getVectorInstrCost;
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, const Value *Op0,
+                                     const Value *Op1) const override;
+
   TTI::MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
 };
diff --git a/llvm/test/Analysis/CostModel/LoongArch/vector-extract.ll b/llvm/test/Analysis/CostModel/LoongArch/vector-extract.ll
new file mode 100644
index 0000000000000..6a98a30aa4053
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/vector-extract.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @extract_double(i32 %arg) {
+; LSX-LABEL: 'extract_double'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = extractelement <2 x double> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = extractelement <2 x double> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = extractelement <2 x double> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = extractelement <4 x double> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = extractelement <4 x double> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = extractelement <4 x double> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_double'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = extractelement <2 x double> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = extractelement <2 x double> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = extractelement <2 x double> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = extractelement <4 x double> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = extractelement <4 x double> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = extractelement <4 x double> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2f64_a = extractelement <2 x double> poison, i32 %arg
+  %v2f64_0 = extractelement <2 x double> poison, i32 0
+  %v2f64_1 = extractelement <2 x double> poison, i32 1
+
+  %v4f64_a = extractelement <4 x double> poison, i32 %arg
+  %v4f64_0 = extractelement <4 x double> poison, i32 0
+  %v4f64_3 = extractelement <4 x double> poison, i32 3
+
+  ret i32 poison
+}
+
+define i32 @extract_float(i32 %arg) {
+; LSX-LABEL: 'extract_float'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = extractelement <2 x float> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = extractelement <2 x float> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = extractelement <2 x float> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = extractelement <4 x float> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = extractelement <4 x float> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = extractelement <4 x float> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = extractelement <8 x float> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = extractelement <8 x float> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = extractelement <8 x float> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = extractelement <8 x float> poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = extractelement <8 x float> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_float'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = extractelement <2 x float> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = extractelement <2 x float> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = extractelement <2 x float> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = extractelement <4 x float> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = extractelement <4 x float> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = extractelement <4 x float> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = extractelement <8 x float> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = extractelement <8 x float> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = extractelement <8 x float> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = extractelement <8 x float> poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = extractelement <8 x float> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2f32_a = extractelement <2 x float> poison, i32 %arg
+  %v2f32_0 = extractelement <2 x float> poison, i32 0
+  %v2f32_1 = extractelement <2 x float> poison, i32 1
+
+  %v4f32_a = extractelement <4 x float> poison, i32 %arg
+  %v4f32_0 = extractelement <4 x float> poison, i32 0
+  %v4f32_3 = extractelement <4 x float> poison, i32 3
+
+  %v8f32_a = extractelement <8 x float> poison, i32 %arg
+  %v8f32_0 = extractelement <8 x float> poison, i32 0
+  %v8f32_3 = extractelement <8 x float> poison, i32 3
+  %v8f32_4 = extractelement <8 x float> poison, i32 4
+  %v8f32_7 = extractelement <8 x float> poison, i32 7
+
+  ret i32 poison
+}
+
+define i32 @extract_i64(i32 %arg) {
+; LSX-LABEL: 'extract_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = extractelement <2 x i64> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_0 = extractelement <2 x i64> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_1 = extractelement <2 x i64> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = extractelement <4 x i64> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_0 = extractelement <4 x i64> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_3 = extractelement <4 x i64> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = extractelement <2 x i64> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_0 = extractelement <2 x i64> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_1 = extractelement <2 x i64> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = extractelement <4 x i64> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_0 = extractelement <4 x i64> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v4i64_3 = extractelement <4 x i64> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i64_a = extractelement <2 x i64> poison, i32 %arg
+  %v2i64_0 = extractelement <2 x i64> poison, i32 0
+  %v2i64_1 = extractelement <2 x i64> poison, i32 1
+
+  %v4i64_a = extractelement <4 x i64> poison, i32 %arg
+  %v4i64_0 = extractelement <4 x i64> poison, i32 0
+  %v4i64_3 = extractelement <4 x i64> poison, i32 3
+
+  ret i32 poison
+}
+
+define i32 @extract_i32(i32 %arg) {
+; LSX-LABEL: 'extract_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = extractelement <2 x i32> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_0 = extractelement <2 x i32> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_1 = extractelement <2 x i32> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = extractelement <4 x i32> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_0 = extractelement <4 x i32> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_3 = extractelement <4 x i32> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = extractelement <8 x i32> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_0 = extractelement <8 x i32> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_3 = extractelement <8 x i32> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_4 = extractelement <8 x i32> poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_7 = extractelement <8 x i32> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i32_a = extractelement <16 x i32> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_0 = extractelement <16 x i32> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_3 = extractelement <16 x i32> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_8 = extractelement <16 x i32> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_15 = extractelement <16 x i32> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = extractelement <2 x i32> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_0 = extractelement <2 x i32> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_1 = extractelement <2 x i32> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = extractelement <4 x i32> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_0 = extractelement <4 x i32> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_3 = extractelement <4 x i32> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = extractelement <8 x i32> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_0 = extractelement <8 x i32> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_3 = extractelement <8 x i32> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v8i32_4 = extractelement <8 x i32> poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v8i32_7 = extractelement <8 x i32> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i32_a = extractelement <16 x i32> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_0 = extractelement <16 x i32> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_3 = extractelement <16 x i32> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i32_8 = extractelement <16 x i32> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v16i32_15 = extractelement <16 x i32> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i32_a = extractelement <2 x i32> poison, i32 %arg
+  %v2i32_0 = extractelement <2 x i32> poison, i32 0
+  %v2i32_1 = extractelement <2 x i32> poison, i32 1
+
+  %v4i32_a = extractelement <4 x i32> poison, i32 %arg
+  %v4i32_0 = extractelement <4 x i32> poison, i32 0
+  %v4i32_3 = extractelement <4 x i32> poison, i32 3
+
+  %v8i32_a = extractelement <8 x i32> poison, i32 %arg
+  %v8i32_0 = extractelement <8 x i32> poison, i32 0
+  %v8i32_3 = extractelement <8 x i32> poison, i32 3
+  %v8i32_4 = extractelement <8 x i32> poison, i32 4
+  %v8i32_7 = extractelement <8 x i32> poison, i32 7
+
+  %v16i32_a = extractelement <16 x i32> poison, i32 %arg
+  %v16i32_0 = extractelement <16 x i32> poison, i32 0
+  %v16i32_3 = extractelement <16 x i32> poison, i32 3
+  %v16i32_8 = extractelement <16 x i32> poison, i32 8
+  %v16i32_15 = extractelement <16 x i32> poison, i32 15
+
+  ret i32 poison
+}
+
+define i32 @extract_i16(i32 %arg) {
+; LSX-LABEL: 'extract_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = extractelement <2 x i16> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_0 = extractelement <2 x i16> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_1 = extractelement <2 x i16> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = extractelement <4 x i16> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_0 = extractelement <4 x i16> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_3 = extractelement <4 x i16> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = extractelement <8 x i16> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_0 = extractelement <8 x i16> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_7 = extractelement <8 x i16> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = extractelement <16 x i16> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_0 = extractelement <16 x i16> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_7 = extractelement <16 x i16> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_8 = extractelement <16 x i16> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_15 = extractelement <16 x i16> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = extractelement <2 x i16> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_0 = extractelement <2 x i16> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_1 = extractelement <2 x i16> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = extractelement <4 x i16> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_0 = extractelement <4 x i16> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_3 = extractelement <4 x i16> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = extractelement <8 x i16> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_0 = extractelement <8 x i16> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_7 = extractelement <8 x i16> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = extractelement <16 x i16> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_0 = extractelement <16 x i16> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_7 = extractelement <16 x i16> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v16i16_8 = extractelement <16 x i16> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v16i16_15 = extractelement <16 x i16> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i16_a = extractelement <2 x i16> poison, i32 %arg
+  %v2i16_0 = extractelement <2 x i16> poison, i32 0
+  %v2i16_1 = extractelement <2 x i16> poison, i32 1
+
+  %v4i16_a = extractelement <4 x i16> poison, i32 %arg
+  %v4i16_0 = extractelement <4 x i16> poison, i32 0
+  %v4i16_3 = extractelement <4 x i16> poison, i32 3
+
+  %v8i16_a = extractelement <8 x i16> poison, i32 %arg
+  %v8i16_0 = extractelement <8 x i16> poison, i32 0
+  %v8i16_7 = extractelement <8 x i16> poison, i32 7
+
+  %v16i16_a = extractelement <16 x i16> poison, i32 %arg
+  %v16i16_0 = extractelement <16 x i16> poison, i32 0
+  %v16i16_7 = extractelement <16 x i16> poison, i32 7
+  %v16i16_8 = extractelement <16 x i16> poison, i32 8
+  %v16i16_15 = extractelement <16 x i16> poison, i32 15
+
+  ret i32 poison
+}
+
+define i32 @extract_i8(i32 %arg) {
+; LSX-LABEL: 'extract_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = extractelement <2 x i8> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_0 = extractelement <2 x i8> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_1 = extractelement <2 x i8> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = extractelement <4 x i8> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_0 = extractelement <4 x i8> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_3 = extractelement <4 x i8> poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = extractelement <8 x i8> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_0 = extractelement <8 x i8> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_7 = extractelement <8 x i8> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = extractelement <16 x i8> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_0 = extractelement <16 x i8> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_8 = extractelement <16 x i8> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_15 = extractelement <16 x i8> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = extractelement <32 x i8> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_0 = extractelement <32 x i8> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_7 = extractelement <32 x i8> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_8 = extractelement <32 x i8> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_15 = extractelement <32 x i8> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_24 = extractelement <32 x i8> poison, i32 24
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_31 = extractelement <32 x i8> poison, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = extractelement <2 x i8> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_0 = extractelement <2 x i8> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_1 = extractelement <2 x i8> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = extractelement <4 x i8> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_0 = extractelement <4 x i8> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_3 = extractelement <4 x i8> poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = extractelement <8 x i8> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_0 = extractelement <8 x i8> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_7 = extractelement <8 x i8> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = extractelement <16 x i8> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_0 = extractelement <16 x i8> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_8 = extractelement <16 x i8> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_15 = extractelement <16 x i8> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = extractelement <32 x i8> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_0 = extractelement <32 x i8> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_7 = extractelement <32 x i8> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_8 = extractelement <32 x i8> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_15 = extractelement <32 x i8> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v32i8_24 = extractelement <32 x i8> poison, i32 24
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v32i8_31 = extractelement <32 x i8> poison, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i8_a = extractelement <2 x i8> poison, i32 %arg
+  %v2i8_0 = extractelement <2 x i8> poison, i32 0
+  %v2i8_1 = extractelement <2 x i8> poison, i32 1
+
+  %v4i8_a = extractelement <4 x i8> poison, i32 %arg
+  %v4i8_0 = extractelement <4 x i8> poison, i32 0
+  %v4i8_3 = extractelement <4 x i8> poison, i32 3
+
+  %v8i8_a = extractelement <8 x i8> poison, i32 %arg
+  %v8i8_0 = extractelement <8 x i8> poison, i32 0
+  %v8i8_7 = extractelement <8 x i8> poison, i32 7
+
+  %v16i8_a = extractelement <16 x i8> poison, i32 %arg
+  %v16i8_0 = extractelement <16 x i8> poison, i32 0
+  %v16i8_8 = extractelement <16 x i8> poison, i32 8
+  %v16i8_15 = extractelement <16 x i8> poison, i32 15
+
+  %v32i8_a = extractelement <32 x i8> poison, i32 %arg
+  %v32i8_0 = extractelement <32 x i8> poison, i32 0
+  %v32i8_7 = extractelement <32 x i8> poison, i32 7
+  %v32i8_8 = extractelement <32 x i8> poison, i32 8
+  %v32i8_15 = extractelement <32 x i8> poison, i32 15
+  %v32i8_24 = extractelement <32 x i8> poison, i32 24
+  %v32i8_31 = extractelement <32 x i8> poison, i32 31
+
+  ret i32 poison
+}
+
+define i32 @extract_i1(i32 %arg) {
+; LSX-LABEL: 'extract_i1'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = extractelement <2 x i1> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_0 = extractelement <2 x i1> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_1 = extractelement <2 x i1> poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = extractelement <4 x i1> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_0 = extractelement <4 x i1> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_2 = extractelement <4 x i1> poison, i32 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = extractelement <8 x i1> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_0 = extractelement <8 x i1> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_4 = extractelement <8 x i1> poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = extractelement <16 x i1> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_0 = extractelement <16 x i1> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_8 = extractelement <16 x i1> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_15 = extractelement <16 x i1> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = extractelement <32 x i1> poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_0 = extractelement <32 x i1> poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_7 = extractelement <32 x i1> poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_8 = extractelement <32 x i1> poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_15 = extractelement <32 x i1> poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_24 = extractelement <32 x i1> poison, i32 24
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_31 = extractelement <32 x i1> poison, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'extract_i1'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = extractelement <2 x i1> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_0 = extractelement <2 x i1> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_1 = extractelement <2 x i1> poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = extractelement <4 x i1> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_0 = extractelement <4 x i1> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_2 = extractelement <4 x i1> poison, i32 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = extractelement <8 x i1> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_0 = extractelement <8 x i1> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_4 = extractelement <8 x i1> poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = extractelement <16 x i1> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_0 = extractelement <16 x i1> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_8 = extractelement <16 x i1> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_15 = extractelement <16 x i1> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = extractelement <32 x i1> poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_0 = extractelement <32 x i1> poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_7 = extractelement <32 x i1> poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_8 = extractelement <32 x i1> poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_15 = extractelement <32 x i1> poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v32i1_24 = extractelement <32 x i1> poison, i32 24
+; LASX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:4 SizeLat:2 for: %v32i1_31 = extractelement <32 x i1> poison, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i1_a = extractelement <2 x i1> poison, i32 %arg
+  %v2i1_0 = extractelement <2 x i1> poison, i32 0
+  %v2i1_1 = extractelement <2 x i1> poison, i32 1
+
+  %v4i1_a = extractelement <4 x i1> poison, i32 %arg
+  %v4i1_0 = extractelement <4 x i1> poison, i32 0
+  %v4i1_2 = extractelement <4 x i1> poison, i32 2
+
+  %v8i1_a = extractelement <8 x i1> poison, i32 %arg
+  %v8i1_0 = extractelement <8 x i1> poison, i32 0
+  %v8i1_4 = extractelement <8 x i1> poison, i32 4
+
+  %v16i1_a = extractelement <16 x i1> poison, i32 %arg
+  %v16i1_0 = extractelement <16 x i1> poison, i32 0
+  %v16i1_8 = extractelement <16 x i1> poison, i32 8
+  %v16i1_15 = extractelement <16 x i1> poison, i32 15
+
+  %v32i1_a = extractelement <32 x i1> poison, i32 %arg
+  %v32i1_0 = extractelement <32 x i1> poison, i32 0
+  %v32i1_7 = extractelement <32 x i1> poison, i32 7
+  %v32i1_8 = extractelement <32 x i1> poison, i32 8
+  %v32i1_15 = extractelement <32 x i1> poison, i32 15
+  %v32i1_24 = extractelement <32 x i1> poison, i32 24
+  %v32i1_31 = extractelement <32 x i1> poison, i32 31
+
+  ret i32 poison
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/vector-insert-value.ll b/llvm/test/Analysis/CostModel/LoongArch/vector-insert-value.ll
new file mode 100644
index 0000000000000..574ae68278335
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/vector-insert-value.ll
@@ -0,0 +1,361 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @insert_double(i32 %arg, double %val, <2 x double> %src128, <4 x double> %src256, <8 x double> %src512) {
+; LSX-LABEL: 'insert_double'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_double'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
+  %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+  %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
+
+  %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
+  %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+  %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
+
+  ret i32 undef
+}
+
+define i32 @insert_float(i32 %arg, float %val, <2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
+; LSX-LABEL: 'insert_float'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_float'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
+  %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+  %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
+
+  %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
+  %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+  %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
+
+  %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
+  %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+  %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
+  %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+  %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
+
+  ret i32 undef
+}
+
+define i32 @insert_i64(i32 %arg, i64 %val, <2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) {
+; LSX-LABEL: 'insert_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = insertelement <2 x i64> %src128, i64 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_0 = insertelement <2 x i64> %src128, i64 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_1 = insertelement <2 x i64> %src128, i64 %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = insertelement <4 x i64> %src256, i64 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_0 = insertelement <4 x i64> %src256, i64 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_3 = insertelement <4 x i64> %src256, i64 %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = insertelement <2 x i64> %src128, i64 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_0 = insertelement <2 x i64> %src128, i64 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i64_1 = insertelement <2 x i64> %src128, i64 %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = insertelement <4 x i64> %src256, i64 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i64_0 = insertelement <4 x i64> %src256, i64 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v4i64_3 = insertelement <4 x i64> %src256, i64 %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2i64_a = insertelement <2 x i64> %src128, i64 %val, i32 %arg
+  %v2i64_0 = insertelement <2 x i64> %src128, i64 %val, i32 0
+  %v2i64_1 = insertelement <2 x i64> %src128, i64 %val, i32 1
+
+  %v4i64_a = insertelement <4 x i64> %src256, i64 %val, i32 %arg
+  %v4i64_0 = insertelement <4 x i64> %src256, i64 %val, i32 0
+  %v4i64_3 = insertelement <4 x i64> %src256, i64 %val, i32 3
+
+  ret i32 undef
+}
+
+define i32 @insert_i32(i32 %arg, i32 %val, <2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) {
+; LSX-LABEL: 'insert_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = insertelement <2 x i32> %src64, i32 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_0 = insertelement <2 x i32> %src64, i32 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_1 = insertelement <2 x i32> %src64, i32 %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = insertelement <4 x i32> %src128, i32 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_0 = insertelement <4 x i32> %src128, i32 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_3 = insertelement <4 x i32> %src128, i32 %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = insertelement <8 x i32> %src256, i32 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_0 = insertelement <8 x i32> %src256, i32 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_3 = insertelement <8 x i32> %src256, i32 %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_4 = insertelement <8 x i32> %src256, i32 %val, i32 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_7 = insertelement <8 x i32> %src256, i32 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = insertelement <2 x i32> %src64, i32 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_0 = insertelement <2 x i32> %src64, i32 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i32_1 = insertelement <2 x i32> %src64, i32 %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = insertelement <4 x i32> %src128, i32 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_0 = insertelement <4 x i32> %src128, i32 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i32_3 = insertelement <4 x i32> %src128, i32 %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = insertelement <8 x i32> %src256, i32 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_0 = insertelement <8 x i32> %src256, i32 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i32_3 = insertelement <8 x i32> %src256, i32 %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v8i32_4 = insertelement <8 x i32> %src256, i32 %val, i32 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v8i32_7 = insertelement <8 x i32> %src256, i32 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2i32_a = insertelement <2 x i32> %src64, i32 %val, i32 %arg
+  %v2i32_0 = insertelement <2 x i32> %src64, i32 %val, i32 0
+  %v2i32_1 = insertelement <2 x i32> %src64, i32 %val, i32 1
+
+  %v4i32_a = insertelement <4 x i32> %src128, i32 %val, i32 %arg
+  %v4i32_0 = insertelement <4 x i32> %src128, i32 %val, i32 0
+  %v4i32_3 = insertelement <4 x i32> %src128, i32 %val, i32 3
+
+  %v8i32_a = insertelement <8 x i32> %src256, i32 %val, i32 %arg
+  %v8i32_0 = insertelement <8 x i32> %src256, i32 %val, i32 0
+  %v8i32_3 = insertelement <8 x i32> %src256, i32 %val, i32 3
+  %v8i32_4 = insertelement <8 x i32> %src256, i32 %val, i32 4
+  %v8i32_7 = insertelement <8 x i32> %src256, i32 %val, i32 7
+
+  ret i32 undef
+}
+
+define i32 @insert_i16(i32 %arg, i16 %val, <2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) {
+; LSX-LABEL: 'insert_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = insertelement <2 x i16> %src32, i16 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_0 = insertelement <2 x i16> %src32, i16 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_1 = insertelement <2 x i16> %src32, i16 %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = insertelement <4 x i16> %src64, i16 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_0 = insertelement <4 x i16> %src64, i16 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_3 = insertelement <4 x i16> %src64, i16 %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = insertelement <8 x i16> %src128, i16 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_0 = insertelement <8 x i16> %src128, i16 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_7 = insertelement <8 x i16> %src128, i16 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = insertelement <16 x i16> %src256, i16 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_0 = insertelement <16 x i16> %src256, i16 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_7 = insertelement <16 x i16> %src256, i16 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_8 = insertelement <16 x i16> %src256, i16 %val, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_15 = insertelement <16 x i16> %src256, i16 %val, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = insertelement <2 x i16> %src32, i16 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_0 = insertelement <2 x i16> %src32, i16 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i16_1 = insertelement <2 x i16> %src32, i16 %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = insertelement <4 x i16> %src64, i16 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_0 = insertelement <4 x i16> %src64, i16 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i16_3 = insertelement <4 x i16> %src64, i16 %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = insertelement <8 x i16> %src128, i16 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_0 = insertelement <8 x i16> %src128, i16 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i16_7 = insertelement <8 x i16> %src128, i16 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = insertelement <16 x i16> %src256, i16 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_0 = insertelement <16 x i16> %src256, i16 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i16_7 = insertelement <16 x i16> %src256, i16 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v16i16_8 = insertelement <16 x i16> %src256, i16 %val, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v16i16_15 = insertelement <16 x i16> %src256, i16 %val, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2i16_a = insertelement <2 x i16> %src32, i16 %val, i32 %arg
+  %v2i16_0 = insertelement <2 x i16> %src32, i16 %val, i32 0
+  %v2i16_1 = insertelement <2 x i16> %src32, i16 %val, i32 1
+
+  %v4i16_a = insertelement <4 x i16> %src64, i16 %val, i32 %arg
+  %v4i16_0 = insertelement <4 x i16> %src64, i16 %val, i32 0
+  %v4i16_3 = insertelement <4 x i16> %src64, i16 %val, i32 3
+
+  %v8i16_a = insertelement <8 x i16> %src128, i16 %val, i32 %arg
+  %v8i16_0 = insertelement <8 x i16> %src128, i16 %val, i32 0
+  %v8i16_7 = insertelement <8 x i16> %src128, i16 %val, i32 7
+
+  %v16i16_a  = insertelement <16 x i16> %src256, i16 %val, i32 %arg
+  %v16i16_0  = insertelement <16 x i16> %src256, i16 %val, i32 0
+  %v16i16_7  = insertelement <16 x i16> %src256, i16 %val, i32 7
+  %v16i16_8  = insertelement <16 x i16> %src256, i16 %val, i32 8
+  %v16i16_15 = insertelement <16 x i16> %src256, i16 %val, i32 15
+
+  ret i32 undef
+}
+
+define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
+; LSX-LABEL: 'insert_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_0 = insertelement <4 x i8> %src32, i8 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_3 = insertelement <4 x i8> %src32, i8 %val, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = insertelement <8 x i8> %src64, i8 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_0 = insertelement <2 x i8> %src16, i8 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i8_3 = insertelement <2 x i8> %src16, i8 %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = insertelement <4 x i8> %src32, i8 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_0 = insertelement <4 x i8> %src32, i8 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i8_3 = insertelement <4 x i8> %src32, i8 %val, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = insertelement <8 x i8> %src64, i8 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2i8_a   = insertelement <2 x i8> %src16, i8 %val, i32 %arg
+  %v2i8_0   = insertelement <2 x i8> %src16, i8 %val, i32 0
+  %v2i8_3   = insertelement <2 x i8> %src16, i8 %val, i32 1
+
+  %v4i8_a   = insertelement <4 x i8> %src32, i8 %val, i32 %arg
+  %v4i8_0   = insertelement <4 x i8> %src32, i8 %val, i32 0
+  %v4i8_3   = insertelement <4 x i8> %src32, i8 %val, i32 3
+
+  %v8i8_a   = insertelement <8 x i8> %src64, i8 %val, i32 %arg
+  %v8i8_0   = insertelement <8 x i8> %src64, i8 %val, i32 0
+  %v8i8_7   = insertelement <8 x i8> %src64, i8 %val, i32 7
+
+  %v16i8_a  = insertelement <16 x i8> %src128, i8 %val, i32 %arg
+  %v16i8_0  = insertelement <16 x i8> %src128, i8 %val, i32 0
+  %v16i8_8  = insertelement <16 x i8> %src128, i8 %val, i32 8
+  %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15
+
+  %v32i8_a  = insertelement <32 x i8> %src256, i8 %val, i32 %arg
+  %v32i8_0  = insertelement <32 x i8> %src256, i8 %val, i32 0
+  %v32i8_7  = insertelement <32 x i8> %src256, i8 %val, i32 7
+  %v32i8_8  = insertelement <32 x i8> %src256, i8 %val, i32 8
+  %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15
+  %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24
+  %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31
+
+  ret i32 undef
+}
+
+define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> %src16, <32 x i1> %src32, <64 x i1> %src64) {
+; LSX-LABEL: 'insert_i1'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'insert_i1'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_0 = insertelement <2 x i1> %src2, i1 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v2i1_1 = insertelement <2 x i1> %src2, i1 %val, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_0 = insertelement <4 x i1> %src4, i1 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v4i1_2 = insertelement <4 x i1> %src4, i1 %val, i32 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
+; LASX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:3 for: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %v2i1_a  = insertelement <2 x i1> %src2, i1 %val, i32 %arg
+  %v2i1_0  = insertelement <2 x i1> %src2, i1 %val, i32 0
+  %v2i1_1  = insertelement <2 x i1> %src2, i1 %val, i32 1
+
+  %v4i1_a  = insertelement <4 x i1> %src4, i1 %val, i32 %arg
+  %v4i1_0  = insertelement <4 x i1> %src4, i1 %val, i32 0
+  %v4i1_2  = insertelement <4 x i1> %src4, i1 %val, i32 2
+
+  %v8i1_a  = insertelement <8 x i1> %src8, i1 %val, i32 %arg
+  %v8i1_0  = insertelement <8 x i1> %src8, i1 %val, i32 0
+  %v8i1_4  = insertelement <8 x i1> %src8, i1 %val, i32 4
+
+  %v16i1_a  = insertelement <16 x i1> %src16, i1 %val, i32 %arg
+  %v16i1_0  = insertelement <16 x i1> %src16, i1 %val, i32 0
+  %v16i1_8  = insertelement <16 x i1> %src16, i1 %val, i32 8
+  %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15
+
+  %v32i1_a  = insertelement <32 x i1> %src32, i1 %val, i32 %arg
+  %v32i1_0  = insertelement <32 x i1> %src32, i1 %val, i32 0
+  %v32i1_7  = insertelement <32 x i1> %src32, i1 %val, i32 7
+  %v32i1_8  = insertelement <32 x i1> %src32, i1 %val, i32 8
+  %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15
+  %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24
+  %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/vector-insert.ll b/llvm/test/Analysis/CostModel/LoongArch/vector-insert.ll
new file mode 100644
index 0000000000000..452fa4c8584e0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/vector-insert.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @insert_double(i32 %arg) {
+; LSX-LABEL: 'insert_double'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = insertelement <2 x double> poison, double poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = insertelement <2 x double> poison, double poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = insertelement <2 x double> poison, double poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = insertelement <4 x double> poison, double poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = insertelement <4 x double> poison, double poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = insertelement <4 x double> poison, double poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_double'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_a = insertelement <2 x double> poison, double poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_0 = insertelement <2 x double> poison, double poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f64_1 = insertelement <2 x double> poison, double poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_a = insertelement <4 x double> poison, double poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_0 = insertelement <4 x double> poison, double poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f64_3 = insertelement <4 x double> poison, double poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2f64_a = insertelement <2 x double> poison, double poison, i32 %arg
+  %v2f64_0 = insertelement <2 x double> poison, double poison, i32 0
+  %v2f64_1 = insertelement <2 x double> poison, double poison, i32 1
+
+  %v4f64_a = insertelement <4 x double> poison, double poison, i32 %arg
+  %v4f64_0 = insertelement <4 x double> poison, double poison, i32 0
+  %v4f64_3 = insertelement <4 x double> poison, double poison, i32 3
+
+  ret i32 poison
+}
+
+define i32 @insert_float(i32 %arg) {
+; LSX-LABEL: 'insert_float'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = insertelement <2 x float> poison, float poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = insertelement <2 x float> poison, float poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = insertelement <2 x float> poison, float poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = insertelement <4 x float> poison, float poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = insertelement <4 x float> poison, float poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = insertelement <4 x float> poison, float poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = insertelement <8 x float> poison, float poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = insertelement <8 x float> poison, float poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = insertelement <8 x float> poison, float poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = insertelement <8 x float> poison, float poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = insertelement <8 x float> poison, float poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_float'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_a = insertelement <2 x float> poison, float poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_0 = insertelement <2 x float> poison, float poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2f32_1 = insertelement <2 x float> poison, float poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_a = insertelement <4 x float> poison, float poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_0 = insertelement <4 x float> poison, float poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4f32_3 = insertelement <4 x float> poison, float poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_a = insertelement <8 x float> poison, float poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_0 = insertelement <8 x float> poison, float poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_3 = insertelement <8 x float> poison, float poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_4 = insertelement <8 x float> poison, float poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8f32_7 = insertelement <8 x float> poison, float poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2f32_a = insertelement <2 x float> poison, float poison, i32 %arg
+  %v2f32_0 = insertelement <2 x float> poison, float poison, i32 0
+  %v2f32_1 = insertelement <2 x float> poison, float poison, i32 1
+
+  %v4f32_a = insertelement <4 x float> poison, float poison, i32 %arg
+  %v4f32_0 = insertelement <4 x float> poison, float poison, i32 0
+  %v4f32_3 = insertelement <4 x float> poison, float poison, i32 3
+
+  %v8f32_a = insertelement <8 x float> poison, float poison, i32 %arg
+  %v8f32_0 = insertelement <8 x float> poison, float poison, i32 0
+  %v8f32_3 = insertelement <8 x float> poison, float poison, i32 3
+  %v8f32_4 = insertelement <8 x float> poison, float poison, i32 4
+  %v8f32_7 = insertelement <8 x float> poison, float poison, i32 7
+
+  ret i32 poison
+}
+
+define i32 @insert_i64(i32 %arg) {
+; LSX-LABEL: 'insert_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = insertelement <2 x i64> poison, i64 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_0 = insertelement <2 x i64> poison, i64 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_1 = insertelement <2 x i64> poison, i64 poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = insertelement <4 x i64> poison, i64 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_0 = insertelement <4 x i64> poison, i64 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_3 = insertelement <4 x i64> poison, i64 poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_a = insertelement <2 x i64> poison, i64 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_0 = insertelement <2 x i64> poison, i64 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i64_1 = insertelement <2 x i64> poison, i64 poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_a = insertelement <4 x i64> poison, i64 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i64_0 = insertelement <4 x i64> poison, i64 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v4i64_3 = insertelement <4 x i64> poison, i64 poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i64_a = insertelement <2 x i64> poison, i64 poison, i32 %arg
+  %v2i64_0 = insertelement <2 x i64> poison, i64 poison, i32 0
+  %v2i64_1 = insertelement <2 x i64> poison, i64 poison, i32 1
+
+  %v4i64_a = insertelement <4 x i64> poison, i64 poison, i32 %arg
+  %v4i64_0 = insertelement <4 x i64> poison, i64 poison, i32 0
+  %v4i64_3 = insertelement <4 x i64> poison, i64 poison, i32 3
+
+  ret i32 poison
+}
+
+define i32 @insert_i32(i32 %arg) {
+; LSX-LABEL: 'insert_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = insertelement <2 x i32> poison, i32 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_0 = insertelement <2 x i32> poison, i32 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_1 = insertelement <2 x i32> poison, i32 poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = insertelement <4 x i32> poison, i32 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_0 = insertelement <4 x i32> poison, i32 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_3 = insertelement <4 x i32> poison, i32 poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = insertelement <8 x i32> poison, i32 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_0 = insertelement <8 x i32> poison, i32 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_3 = insertelement <8 x i32> poison, i32 poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_4 = insertelement <8 x i32> poison, i32 poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_7 = insertelement <8 x i32> poison, i32 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_a = insertelement <2 x i32> poison, i32 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_0 = insertelement <2 x i32> poison, i32 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i32_1 = insertelement <2 x i32> poison, i32 poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_a = insertelement <4 x i32> poison, i32 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_0 = insertelement <4 x i32> poison, i32 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i32_3 = insertelement <4 x i32> poison, i32 poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_a = insertelement <8 x i32> poison, i32 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_0 = insertelement <8 x i32> poison, i32 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i32_3 = insertelement <8 x i32> poison, i32 poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v8i32_4 = insertelement <8 x i32> poison, i32 poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v8i32_7 = insertelement <8 x i32> poison, i32 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i32_a = insertelement <2 x i32> poison, i32 poison, i32 %arg
+  %v2i32_0 = insertelement <2 x i32> poison, i32 poison, i32 0
+  %v2i32_1 = insertelement <2 x i32> poison, i32 poison, i32 1
+
+  %v4i32_a = insertelement <4 x i32> poison, i32 poison, i32 %arg
+  %v4i32_0 = insertelement <4 x i32> poison, i32 poison, i32 0
+  %v4i32_3 = insertelement <4 x i32> poison, i32 poison, i32 3
+
+  %v8i32_a = insertelement <8 x i32> poison, i32 poison, i32 %arg
+  %v8i32_0 = insertelement <8 x i32> poison, i32 poison, i32 0
+  %v8i32_3 = insertelement <8 x i32> poison, i32 poison, i32 3
+  %v8i32_4 = insertelement <8 x i32> poison, i32 poison, i32 4
+  %v8i32_7 = insertelement <8 x i32> poison, i32 poison, i32 7
+
+  ret i32 poison
+}
+
+define i32 @insert_i16(i32 %arg) {
+; LSX-LABEL: 'insert_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = insertelement <2 x i16> poison, i16 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_0 = insertelement <2 x i16> poison, i16 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_1 = insertelement <2 x i16> poison, i16 poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = insertelement <4 x i16> poison, i16 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_0 = insertelement <4 x i16> poison, i16 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_3 = insertelement <4 x i16> poison, i16 poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = insertelement <8 x i16> poison, i16 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_0 = insertelement <8 x i16> poison, i16 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_7 = insertelement <8 x i16> poison, i16 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = insertelement <16 x i16> poison, i16 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_0 = insertelement <16 x i16> poison, i16 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_7 = insertelement <16 x i16> poison, i16 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_8 = insertelement <16 x i16> poison, i16 poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_15 = insertelement <16 x i16> poison, i16 poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_a = insertelement <2 x i16> poison, i16 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_0 = insertelement <2 x i16> poison, i16 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i16_1 = insertelement <2 x i16> poison, i16 poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_a = insertelement <4 x i16> poison, i16 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_0 = insertelement <4 x i16> poison, i16 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i16_3 = insertelement <4 x i16> poison, i16 poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_a = insertelement <8 x i16> poison, i16 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_0 = insertelement <8 x i16> poison, i16 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i16_7 = insertelement <8 x i16> poison, i16 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_a = insertelement <16 x i16> poison, i16 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_0 = insertelement <16 x i16> poison, i16 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i16_7 = insertelement <16 x i16> poison, i16 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v16i16_8 = insertelement <16 x i16> poison, i16 poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v16i16_15 = insertelement <16 x i16> poison, i16 poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i16_a = insertelement <2 x i16> poison, i16 poison, i32 %arg
+  %v2i16_0 = insertelement <2 x i16> poison, i16 poison, i32 0
+  %v2i16_1 = insertelement <2 x i16> poison, i16 poison, i32 1
+
+  %v4i16_a = insertelement <4 x i16> poison, i16 poison, i32 %arg
+  %v4i16_0 = insertelement <4 x i16> poison, i16 poison, i32 0
+  %v4i16_3 = insertelement <4 x i16> poison, i16 poison, i32 3
+
+  %v8i16_a = insertelement <8 x i16> poison, i16 poison, i32 %arg
+  %v8i16_0 = insertelement <8 x i16> poison, i16 poison, i32 0
+  %v8i16_7 = insertelement <8 x i16> poison, i16 poison, i32 7
+
+  %v16i16_a  = insertelement <16 x i16> poison, i16 poison, i32 %arg
+  %v16i16_0  = insertelement <16 x i16> poison, i16 poison, i32 0
+  %v16i16_7  = insertelement <16 x i16> poison, i16 poison, i32 7
+  %v16i16_8  = insertelement <16 x i16> poison, i16 poison, i32 8
+  %v16i16_15 = insertelement <16 x i16> poison, i16 poison, i32 15
+
+  ret i32 poison
+}
+
+define i32 @insert_i8(i32 %arg) {
+; LSX-LABEL: 'insert_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = insertelement <2 x i8> poison, i8 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_0 = insertelement <2 x i8> poison, i8 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_3 = insertelement <2 x i8> poison, i8 poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = insertelement <4 x i8> poison, i8 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_0 = insertelement <4 x i8> poison, i8 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_3 = insertelement <4 x i8> poison, i8 poison, i32 3
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = insertelement <8 x i8> poison, i8 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_0 = insertelement <8 x i8> poison, i8 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_7 = insertelement <8 x i8> poison, i8 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = insertelement <16 x i8> poison, i8 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_0 = insertelement <16 x i8> poison, i8 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_8 = insertelement <16 x i8> poison, i8 poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_15 = insertelement <16 x i8> poison, i8 poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = insertelement <32 x i8> poison, i8 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_0 = insertelement <32 x i8> poison, i8 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_7 = insertelement <32 x i8> poison, i8 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_8 = insertelement <32 x i8> poison, i8 poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_15 = insertelement <32 x i8> poison, i8 poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_24 = insertelement <32 x i8> poison, i8 poison, i32 24
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_31 = insertelement <32 x i8> poison, i8 poison, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_a = insertelement <2 x i8> poison, i8 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_0 = insertelement <2 x i8> poison, i8 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i8_3 = insertelement <2 x i8> poison, i8 poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_a = insertelement <4 x i8> poison, i8 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_0 = insertelement <4 x i8> poison, i8 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i8_3 = insertelement <4 x i8> poison, i8 poison, i32 3
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_a = insertelement <8 x i8> poison, i8 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_0 = insertelement <8 x i8> poison, i8 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i8_7 = insertelement <8 x i8> poison, i8 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_a = insertelement <16 x i8> poison, i8 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_0 = insertelement <16 x i8> poison, i8 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_8 = insertelement <16 x i8> poison, i8 poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i8_15 = insertelement <16 x i8> poison, i8 poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_a = insertelement <32 x i8> poison, i8 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_0 = insertelement <32 x i8> poison, i8 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_7 = insertelement <32 x i8> poison, i8 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_8 = insertelement <32 x i8> poison, i8 poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i8_15 = insertelement <32 x i8> poison, i8 poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v32i8_24 = insertelement <32 x i8> poison, i8 poison, i32 24
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v32i8_31 = insertelement <32 x i8> poison, i8 poison, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i8_a   = insertelement <2 x i8> poison, i8 poison, i32 %arg
+  %v2i8_0   = insertelement <2 x i8> poison, i8 poison, i32 0
+  %v2i8_3   = insertelement <2 x i8> poison, i8 poison, i32 1
+
+  %v4i8_a   = insertelement <4 x i8> poison, i8 poison, i32 %arg
+  %v4i8_0   = insertelement <4 x i8> poison, i8 poison, i32 0
+  %v4i8_3   = insertelement <4 x i8> poison, i8 poison, i32 3
+
+  %v8i8_a   = insertelement <8 x i8> poison, i8 poison, i32 %arg
+  %v8i8_0   = insertelement <8 x i8> poison, i8 poison, i32 0
+  %v8i8_7   = insertelement <8 x i8> poison, i8 poison, i32 7
+
+  %v16i8_a  = insertelement <16 x i8> poison, i8 poison, i32 %arg
+  %v16i8_0  = insertelement <16 x i8> poison, i8 poison, i32 0
+  %v16i8_8  = insertelement <16 x i8> poison, i8 poison, i32 8
+  %v16i8_15 = insertelement <16 x i8> poison, i8 poison, i32 15
+
+  %v32i8_a  = insertelement <32 x i8> poison, i8 poison, i32 %arg
+  %v32i8_0  = insertelement <32 x i8> poison, i8 poison, i32 0
+  %v32i8_7  = insertelement <32 x i8> poison, i8 poison, i32 7
+  %v32i8_8  = insertelement <32 x i8> poison, i8 poison, i32 8
+  %v32i8_15 = insertelement <32 x i8> poison, i8 poison, i32 15
+  %v32i8_24 = insertelement <32 x i8> poison, i8 poison, i32 24
+  %v32i8_31 = insertelement <32 x i8> poison, i8 poison, i32 31
+
+  ret i32 poison
+}
+
+define i32 @insert_i1(i32 %arg) {
+; LSX-LABEL: 'insert_i1'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = insertelement <2 x i1> poison, i1 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_0 = insertelement <2 x i1> poison, i1 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_1 = insertelement <2 x i1> poison, i1 poison, i32 1
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = insertelement <4 x i1> poison, i1 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_0 = insertelement <4 x i1> poison, i1 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_2 = insertelement <4 x i1> poison, i1 poison, i32 2
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = insertelement <8 x i1> poison, i1 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_0 = insertelement <8 x i1> poison, i1 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_4 = insertelement <8 x i1> poison, i1 poison, i32 4
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = insertelement <16 x i1> poison, i1 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_0 = insertelement <16 x i1> poison, i1 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_8 = insertelement <16 x i1> poison, i1 poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_15 = insertelement <16 x i1> poison, i1 poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = insertelement <32 x i1> poison, i1 poison, i32 %arg
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_0 = insertelement <32 x i1> poison, i1 poison, i32 0
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_7 = insertelement <32 x i1> poison, i1 poison, i32 7
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_8 = insertelement <32 x i1> poison, i1 poison, i32 8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_15 = insertelement <32 x i1> poison, i1 poison, i32 15
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_24 = insertelement <32 x i1> poison, i1 poison, i32 24
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_31 = insertelement <32 x i1> poison, i1 poison, i32 31
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; LASX-LABEL: 'insert_i1'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_a = insertelement <2 x i1> poison, i1 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_0 = insertelement <2 x i1> poison, i1 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v2i1_1 = insertelement <2 x i1> poison, i1 poison, i32 1
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_a = insertelement <4 x i1> poison, i1 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_0 = insertelement <4 x i1> poison, i1 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v4i1_2 = insertelement <4 x i1> poison, i1 poison, i32 2
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_a = insertelement <8 x i1> poison, i1 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_0 = insertelement <8 x i1> poison, i1 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v8i1_4 = insertelement <8 x i1> poison, i1 poison, i32 4
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_a = insertelement <16 x i1> poison, i1 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_0 = insertelement <16 x i1> poison, i1 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_8 = insertelement <16 x i1> poison, i1 poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v16i1_15 = insertelement <16 x i1> poison, i1 poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_a = insertelement <32 x i1> poison, i1 poison, i32 %arg
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_0 = insertelement <32 x i1> poison, i1 poison, i32 0
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_7 = insertelement <32 x i1> poison, i1 poison, i32 7
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_8 = insertelement <32 x i1> poison, i1 poison, i32 8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %v32i1_15 = insertelement <32 x i1> poison, i1 poison, i32 15
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v32i1_24 = insertelement <32 x i1> poison, i1 poison, i32 24
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %v32i1_31 = insertelement <32 x i1> poison, i1 poison, i32 31
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %v2i1_a  = insertelement <2 x i1> poison, i1 poison, i32 %arg
+  %v2i1_0  = insertelement <2 x i1> poison, i1 poison, i32 0
+  %v2i1_1  = insertelement <2 x i1> poison, i1 poison, i32 1
+
+  %v4i1_a  = insertelement <4 x i1> poison, i1 poison, i32 %arg
+  %v4i1_0  = insertelement <4 x i1> poison, i1 poison, i32 0
+  %v4i1_2  = insertelement <4 x i1> poison, i1 poison, i32 2
+
+  %v8i1_a  = insertelement <8 x i1> poison, i1 poison, i32 %arg
+  %v8i1_0  = insertelement <8 x i1> poison, i1 poison, i32 0
+  %v8i1_4  = insertelement <8 x i1> poison, i1 poison, i32 4
+
+  %v16i1_a  = insertelement <16 x i1> poison, i1 poison, i32 %arg
+  %v16i1_0  = insertelement <16 x i1> poison, i1 poison, i32 0
+  %v16i1_8  = insertelement <16 x i1> poison, i1 poison, i32 8
+  %v16i1_15 = insertelement <16 x i1> poison, i1 poison, i32 15
+
+  %v32i1_a  = insertelement <32 x i1> poison, i1 poison, i32 %arg
+  %v32i1_0  = insertelement <32 x i1> poison, i1 poison, i32 0
+  %v32i1_7  = insertelement <32 x i1> poison, i1 poison, i32 7
+  %v32i1_8  = insertelement <32 x i1> poison, i1 poison, i32 8
+  %v32i1_15 = insertelement <32 x i1> poison, i1 poison, i32 15
+  %v32i1_24 = insertelement <32 x i1> poison, i1 poison, i32 24
+  %v32i1_31 = insertelement <32 x i1> poison, i1 poison, i32 31
+
+  ret i32 poison
+}
+

>From c1ef77b6f8c2dcb3b04f9a5d1dbeda3461081500 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 29 Oct 2025 16:51:58 +0800
Subject: [PATCH 3/8] Support getMemoryOpCost

---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |   3 -
 .../LoongArchTargetTransformInfo.cpp          |  25 +++
 .../LoongArch/LoongArchTargetTransformInfo.h  |   5 +
 .../CostModel/LoongArch/load-store.ll         | 144 ++++++++++++++++++
 4 files changed, 174 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/load-store.ll

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 74857a5b83aba..731fe7c441ad2 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1469,9 +1469,6 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
                                         OpInfo, I);
     }
     case Instruction::Load: {
-      // FIXME: Arbitary cost which could come from the backend.
-      if (CostKind == TTI::TCK_Latency)
-        return 4;
       auto *LI = cast<LoadInst>(U);
       Type *LoadType = U->getType();
       // If there is a non-register sized type, the cost estimation may expand
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 1f5eb4792d9d6..5cca9b731c93a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -531,4 +531,29 @@ InstructionCost LoongArchTTIImpl::getVectorInstrCost(
          RegisterFileMoveCost;
 }
 
+InstructionCost LoongArchTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                                  Align Alignment,
+                                                  unsigned AddressSpace,
+                                                  TTI::TargetCostKind CostKind,
+                                                  TTI::OperandValueInfo OpInfo,
+                                                  const Instruction *I) const {
+
+  // Legalize the type.
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
+
+  switch (CostKind) {
+  default:
+    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+                                  CostKind, OpInfo, I);
+  case TTI::TCK_RecipThroughput:
+    return 2 * LT.first;
+  case TTI::TCK_Latency:
+    unsigned Cost = 4;
+    if (Src->isFloatingPointTy() || Src->isVectorTy()) {
+      Cost += 1;
+    }
+    return Cost * LT.first;
+  }
+}
+
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 8d0c05ddd8b6a..0fe96558b65a2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -66,6 +66,11 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
                                      TTI::TargetCostKind CostKind,
                                      unsigned Index, const Value *Op0,
                                      const Value *Op1) const override;
+  InstructionCost getMemoryOpCost(
+      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
+      TTI::TargetCostKind CostKind,
+      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
+      const Instruction *I = nullptr) const override;
 
   TTI::MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
diff --git a/llvm/test/Analysis/CostModel/LoongArch/load-store.ll b/llvm/test/Analysis/CostModel/LoongArch/load-store.ll
new file mode 100644
index 0000000000000..b176d093fc5a7
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/load-store.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @store(i32 %arg) {
+; LSX-LABEL: 'store'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i8 poison, ptr poison, align 1
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i16 poison, ptr poison, align 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i32 poison, ptr poison, align 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i64 poison, ptr poison, align 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store float poison, ptr poison, align 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store double poison, ptr poison, align 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <16 x i8> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <8 x i16> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x i32> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <2 x i64> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x float> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <2 x double> poison, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <32 x i8> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <16 x i16> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <8 x i32> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <4 x i64> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <8 x float> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <4 x double> poison, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'store'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i8 poison, ptr poison, align 1
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i16 poison, ptr poison, align 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i32 poison, ptr poison, align 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: store i64 poison, ptr poison, align 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store float poison, ptr poison, align 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store double poison, ptr poison, align 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <16 x i8> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <8 x i16> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x i32> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <2 x i64> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x float> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <2 x double> poison, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <32 x i8> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <16 x i16> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <8 x i32> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x i64> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <8 x float> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <4 x double> poison, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+  store i8 poison, ptr poison
+  store i16 poison, ptr poison
+  store i32 poison, ptr poison
+  store i64 poison, ptr poison
+
+  store float poison, ptr poison
+  store double poison, ptr poison
+
+  store <16 x i8> poison, ptr poison
+  store <8 x i16> poison, ptr poison
+  store <4 x i32> poison, ptr poison
+  store <2 x i64> poison, ptr poison
+
+  store <4 x float> poison, ptr poison
+  store <2 x double> poison, ptr poison
+
+  store <32 x i8> poison, ptr poison
+  store <16 x i16> poison, ptr poison
+  store <8 x i32> poison, ptr poison
+  store <4 x i64> poison, ptr poison
+
+  store <8 x float> poison, ptr poison
+  store <4 x double> poison, ptr poison
+
+  ret i32 poison
+}
+
+define i32 @load(i32 %arg) {
+; LSX-LABEL: 'load'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr poison, align 1
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr poison, align 2
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr poison, align 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr poison, align 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %5 = load float, ptr poison, align 4
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %6 = load double, ptr poison, align 8
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %7 = load <16 x i8>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %8 = load <8 x i16>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %9 = load <4 x i32>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %10 = load <2 x i64>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %11 = load <4 x float>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %12 = load <2 x double>, ptr poison, align 16
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %13 = load <32 x i8>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %14 = load <16 x i16>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %15 = load <8 x i32>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %16 = load <4 x i64>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %17 = load <8 x float>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %18 = load <4 x double>, ptr poison, align 32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+; LASX-LABEL: 'load'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr poison, align 1
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr poison, align 2
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr poison, align 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr poison, align 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %5 = load float, ptr poison, align 4
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %6 = load double, ptr poison, align 8
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %7 = load <16 x i8>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %8 = load <8 x i16>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %9 = load <4 x i32>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %10 = load <2 x i64>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %11 = load <4 x float>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %12 = load <2 x double>, ptr poison, align 16
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %13 = load <32 x i8>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %14 = load <16 x i16>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %15 = load <8 x i32>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %16 = load <4 x i64>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %17 = load <8 x float>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %18 = load <4 x double>, ptr poison, align 32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 poison
+;
+  load i8, ptr poison
+  load i16, ptr poison
+  load i32, ptr poison
+  load i64, ptr poison
+
+  load float, ptr poison
+  load double, ptr poison
+
+  load <16 x i8>, ptr poison
+  load <8 x i16>, ptr poison
+  load <4 x i32>, ptr poison
+  load <2 x i64>, ptr poison
+
+  load <4 x float>, ptr poison
+  load <2 x double>, ptr poison
+
+  load <32 x i8>, ptr poison
+  load <16 x i16>, ptr poison
+  load <8 x i32>, ptr poison
+  load <4 x i64>, ptr poison
+
+  load <8 x float>, ptr poison
+  load <4 x double>, ptr poison
+
+  ret i32 poison
+}

>From 9ad5dec27e2ae823d596f15c06937694537a6ee6 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 24 Nov 2025 11:50:23 +0800
Subject: [PATCH 4/8] make prefersVectorizedAddressing false

---
 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 4 ++++
 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h   | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 5cca9b731c93a..2073c7b8b53d8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -556,4 +556,8 @@ InstructionCost LoongArchTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   }
 }
 
+bool LoongArchTTIImpl::prefersVectorizedAddressing() const {
+  return false;
+}
+
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 0fe96558b65a2..8b8839e41ea16 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -71,6 +71,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
       TTI::TargetCostKind CostKind,
       TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
       const Instruction *I = nullptr) const override;
+  
+  bool prefersVectorizedAddressing() const override;
 
   TTI::MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;

>From 286a9ced9f397c1bd81b6898957795288b6c82d5 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 25 Nov 2025 16:02:49 +0800
Subject: [PATCH 5/8] Support getCmpSelInstrCost

---
 .../LoongArchTargetTransformInfo.cpp          |  72 ++-
 .../LoongArch/LoongArchTargetTransformInfo.h  |   9 +-
 .../test/Analysis/CostModel/LoongArch/fcmp.ll | 532 ++++++++++++++++++
 .../test/Analysis/CostModel/LoongArch/icmp.ll | 471 ++++++++++++++++
 .../Analysis/CostModel/LoongArch/select.ll    |  83 +++
 5 files changed, 1164 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/fcmp.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/icmp.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/select.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 2073c7b8b53d8..ee76750b9c292 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/Support/InstructionCost.h"
 #include <optional>
 
@@ -556,8 +557,75 @@ InstructionCost LoongArchTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   }
 }
 
-bool LoongArchTTIImpl::prefersVectorizedAddressing() const {
-  return false;
+InstructionCost LoongArchTTIImpl::getCmpSelInstrCost(
+    unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
+    TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
+    TTI::OperandValueInfo Op2Info, const Instruction *I) const {
+
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  MVT MTy = LT.second;
+
+  InstructionCost ExtraCost = 0;
+
+  // [x]vsl{t/e}[i] needs extra cost
+  if (MTy.isVector() && MTy.getScalarSizeInBits() == 64 &&
+      CostKind == TTI::TCK_Latency)
+    if (VecPred != CmpInst::ICMP_EQ && VecPred != CmpInst::ICMP_NE)
+      ExtraCost = 1;
+
+  static const CostKindTblEntry LSXCostTable[] = {
+      {ISD::SETCC, MVT::v16i8, {1, 1}}, // veq.b/...
+      {ISD::SETCC, MVT::v8i16, {1, 1}}, // veq.h/...
+      {ISD::SETCC, MVT::v4i32, {1, 1}}, // veq.w/...
+      {ISD::SETCC, MVT::v2i64, {1, 1}}, // veq.d/...
+
+      {ISD::SETCC, MVT::v4f32, {2, 1}}, // vfcmp.cond.s
+      {ISD::SETCC, MVT::v2f64, {2, 1}}, // vfcmp.cond.d
+
+      {ISD::SELECT, MVT::v16i8, {1, 2}}, // vbitsel.v
+      {ISD::SELECT, MVT::v8i16, {1, 2}}, // vbitsel.v
+      {ISD::SELECT, MVT::v4i32, {1, 2}}, // vbitsel.v
+      {ISD::SELECT, MVT::v2i64, {1, 2}}, // vbitsel.v
+
+      {ISD::SELECT, MVT::v4f32, {1, 2}}, // vbitsel.v
+      {ISD::SELECT, MVT::v2f64, {1, 2}}, // vbitsel.v
+  };
+
+  static const CostKindTblEntry LASXCostTable[] = {
+      {ISD::SETCC, MVT::v32i8, {1, 1}},  // xveq.b/...
+      {ISD::SETCC, MVT::v16i16, {1, 1}}, // xveq.h/...
+      {ISD::SETCC, MVT::v8i32, {1, 1}},  // xveq.w/...
+      {ISD::SETCC, MVT::v4i64, {1, 1}},  // xveq.d/...
+
+      {ISD::SETCC, MVT::v2f32, {2, 1}}, // xvfcmp.cond.s
+      {ISD::SETCC, MVT::v4f64, {2, 1}}, // xvfcmp.cond.d
+
+      {ISD::SELECT, MVT::v32i8, {1, 2}},  // xvbitsel.v
+      {ISD::SELECT, MVT::v16i16, {1, 2}}, // xvbitsel.v
+      {ISD::SELECT, MVT::v8i32, {1, 2}},  // xvbitsel.v
+      {ISD::SELECT, MVT::v4i64, {1, 2}},  // xvbitsel.v
+
+      {ISD::SELECT, MVT::v8f32, {1, 2}}, // xvbitsel.v
+      {ISD::SELECT, MVT::v4f64, {1, 2}}, // xvbitsel.v
+  };
+
+  if (ST->hasExtLSX()) {
+    if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, MTy))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * (ExtraCost + *KindCost);
+  }
+
+  if (ST->hasExtLASX()) {
+    if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, MTy))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * (ExtraCost + *KindCost);
+  }
+
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+                                   Op1Info, Op2Info, I);
 }
 
+bool LoongArchTTIImpl::prefersVectorizedAddressing() const { return false; }
+
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 8b8839e41ea16..fe8a50e878f52 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -71,7 +71,14 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
       TTI::TargetCostKind CostKind,
       TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
       const Instruction *I = nullptr) const override;
-  
+
+  InstructionCost getCmpSelInstrCost(
+      unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
+      TTI::TargetCostKind CostKind,
+      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
+      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
+      const Instruction *I = nullptr) const override;
+
   bool prefersVectorizedAddressing() const override;
 
   TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Analysis/CostModel/LoongArch/fcmp.ll b/llvm/test/Analysis/CostModel/LoongArch/fcmp.ll
new file mode 100644
index 0000000000000..9a03235e4b4aa
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/fcmp.ll
@@ -0,0 +1,532 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+
+define i32 @cmp_float_oeq(i32 %arg) {
+; LSX-LABEL: 'cmp_float_oeq'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp oeq float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp oeq <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp oeq <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp oeq <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp oeq double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp oeq <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp oeq <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_oeq'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp oeq float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp oeq <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp oeq <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp oeq <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp oeq double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp oeq <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp oeq <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp oeq float undef, undef
+  %V2F32 = fcmp oeq <2 x float> undef, undef
+  %V4F32 = fcmp oeq <4 x float> undef, undef
+  %V8F32 = fcmp oeq <8 x float> undef, undef
+
+  %F64 = fcmp oeq double undef, undef
+  %V2F64 = fcmp oeq <2 x double> undef, undef
+  %V4F64 = fcmp oeq <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_one(i32 %arg) {
+; LSX-LABEL: 'cmp_float_one'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp one float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp one <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp one <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp one <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp one double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp one <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp one <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_one'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp one float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp one <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp one <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp one <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp one double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp one <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp one <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp one float undef, undef
+  %V2F32 = fcmp one <2 x float> undef, undef
+  %V4F32 = fcmp one <4 x float> undef, undef
+  %V8F32 = fcmp one <8 x float> undef, undef
+
+  %F64 = fcmp one double undef, undef
+  %V2F64 = fcmp one <2 x double> undef, undef
+  %V4F64 = fcmp one <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ord(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ord'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ord float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ord <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ord <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ord <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ord double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ord <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ord <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ord'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ord float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ord <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ord <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ord <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ord double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ord <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ord <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ord float undef, undef
+  %V2F32 = fcmp ord <2 x float> undef, undef
+  %V4F32 = fcmp ord <4 x float> undef, undef
+  %V8F32 = fcmp ord <8 x float> undef, undef
+
+  %F64 = fcmp ord double undef, undef
+  %V2F64 = fcmp ord <2 x double> undef, undef
+  %V4F64 = fcmp ord <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_oge(i32 %arg) {
+; LSX-LABEL: 'cmp_float_oge'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp oge float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp oge <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp oge <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp oge <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp oge double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp oge <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp oge <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_oge'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp oge float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp oge <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp oge <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp oge <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp oge double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp oge <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp oge <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp oge float undef, undef
+  %V2F32 = fcmp oge <2 x float> undef, undef
+  %V4F32 = fcmp oge <4 x float> undef, undef
+  %V8F32 = fcmp oge <8 x float> undef, undef
+
+  %F64 = fcmp oge double undef, undef
+  %V2F64 = fcmp oge <2 x double> undef, undef
+  %V4F64 = fcmp oge <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ogt(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ogt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ogt float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ogt <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ogt <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ogt <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ogt double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ogt <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ogt <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ogt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ogt float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ogt <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ogt <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ogt <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ogt double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ogt <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ogt <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ogt float undef, undef
+  %V2F32 = fcmp ogt <2 x float> undef, undef
+  %V4F32 = fcmp ogt <4 x float> undef, undef
+  %V8F32 = fcmp ogt <8 x float> undef, undef
+
+  %F64 = fcmp ogt double undef, undef
+  %V2F64 = fcmp ogt <2 x double> undef, undef
+  %V4F64 = fcmp ogt <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ole(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ole'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ole float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ole <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ole <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ole <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ole double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ole <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ole <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ole'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ole float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ole <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ole <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ole <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ole double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ole <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ole <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ole float undef, undef
+  %V2F32 = fcmp ole <2 x float> undef, undef
+  %V4F32 = fcmp ole <4 x float> undef, undef
+  %V8F32 = fcmp ole <8 x float> undef, undef
+
+  %F64 = fcmp ole double undef, undef
+  %V2F64 = fcmp ole <2 x double> undef, undef
+  %V4F64 = fcmp ole <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_olt(i32 %arg) {
+; LSX-LABEL: 'cmp_float_olt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp olt float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp olt <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp olt <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp olt <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp olt double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp olt <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp olt <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_olt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp olt float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp olt <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp olt <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp olt <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp olt double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp olt <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp olt <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp olt float undef, undef
+  %V2F32 = fcmp olt <2 x float> undef, undef
+  %V4F32 = fcmp olt <4 x float> undef, undef
+  %V8F32 = fcmp olt <8 x float> undef, undef
+
+  %F64 = fcmp olt double undef, undef
+  %V2F64 = fcmp olt <2 x double> undef, undef
+  %V4F64 = fcmp olt <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ueq(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ueq'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ueq float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ueq <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ueq <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ueq <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ueq double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ueq <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ueq <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ueq'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ueq float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ueq <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ueq <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ueq <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ueq double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ueq <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ueq <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ueq float undef, undef
+  %V2F32 = fcmp ueq <2 x float> undef, undef
+  %V4F32 = fcmp ueq <4 x float> undef, undef
+  %V8F32 = fcmp ueq <8 x float> undef, undef
+
+  %F64 = fcmp ueq double undef, undef
+  %V2F64 = fcmp ueq <2 x double> undef, undef
+  %V4F64 = fcmp ueq <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_une(i32 %arg) {
+; LSX-LABEL: 'cmp_float_une'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp une float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp une <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp une <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp une <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp une double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp une <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp une <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_une'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp une float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp une <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp une <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp une <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp une double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp une <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp une <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp une float undef, undef
+  %V2F32 = fcmp une <2 x float> undef, undef
+  %V4F32 = fcmp une <4 x float> undef, undef
+  %V8F32 = fcmp une <8 x float> undef, undef
+
+  %F64 = fcmp une double undef, undef
+  %V2F64 = fcmp une <2 x double> undef, undef
+  %V4F64 = fcmp une <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_uno(i32 %arg) {
+; LSX-LABEL: 'cmp_float_uno'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp uno float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp uno <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp uno <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp uno <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp uno double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp uno <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp uno <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_uno'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp uno float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp uno <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp uno <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp uno <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp uno double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp uno <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp uno <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp uno float undef, undef
+  %V2F32 = fcmp uno <2 x float> undef, undef
+  %V4F32 = fcmp uno <4 x float> undef, undef
+  %V8F32 = fcmp uno <8 x float> undef, undef
+
+  %F64 = fcmp uno double undef, undef
+  %V2F64 = fcmp uno <2 x double> undef, undef
+  %V4F64 = fcmp uno <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_uge(i32 %arg) {
+; LSX-LABEL: 'cmp_float_uge'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp uge float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp uge <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp uge <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp uge <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp uge double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp uge <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp uge <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_uge'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp uge float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp uge <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp uge <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp uge <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp uge double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp uge <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp uge <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp uge float undef, undef
+  %V2F32 = fcmp uge <2 x float> undef, undef
+  %V4F32 = fcmp uge <4 x float> undef, undef
+  %V8F32 = fcmp uge <8 x float> undef, undef
+
+  %F64 = fcmp uge double undef, undef
+  %V2F64 = fcmp uge <2 x double> undef, undef
+  %V4F64 = fcmp uge <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ugt(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ugt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ugt float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ugt <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ugt <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ugt <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ugt double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ugt <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ugt <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ugt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ugt float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ugt <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ugt <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ugt <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ugt double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ugt <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ugt <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ugt float undef, undef
+  %V2F32 = fcmp ugt <2 x float> undef, undef
+  %V4F32 = fcmp ugt <4 x float> undef, undef
+  %V8F32 = fcmp ugt <8 x float> undef, undef
+
+  %F64 = fcmp ugt double undef, undef
+  %V2F64 = fcmp ugt <2 x double> undef, undef
+  %V4F64 = fcmp ugt <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ule(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ule'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ule float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ule <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ule <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ule <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ule double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ule <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ule <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ule'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ule float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ule <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ule <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ule <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ule double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ule <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ule <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ule float undef, undef
+  %V2F32 = fcmp ule <2 x float> undef, undef
+  %V4F32 = fcmp ule <4 x float> undef, undef
+  %V8F32 = fcmp ule <8 x float> undef, undef
+
+  %F64 = fcmp ule double undef, undef
+  %V2F64 = fcmp ule <2 x double> undef, undef
+  %V4F64 = fcmp ule <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_ult(i32 %arg) {
+; LSX-LABEL: 'cmp_float_ult'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ult float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ult <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ult <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp ult <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ult double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ult <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp ult <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_ult'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp ult float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp ult <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp ult <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp ult <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp ult double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp ult <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp ult <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp ult float undef, undef
+  %V2F32 = fcmp ult <2 x float> undef, undef
+  %V4F32 = fcmp ult <4 x float> undef, undef
+  %V8F32 = fcmp ult <8 x float> undef, undef
+
+  %F64 = fcmp ult double undef, undef
+  %V2F64 = fcmp ult <2 x double> undef, undef
+  %V4F64 = fcmp ult <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_false(i32 %arg) {
+; LSX-LABEL: 'cmp_float_false'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp false float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp false <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp false <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp false <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp false double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp false <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp false <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_false'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp false float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp false <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp false <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp false <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp false double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp false <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp false <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp false float undef, undef
+  %V2F32 = fcmp false <2 x float> undef, undef
+  %V4F32 = fcmp false <4 x float> undef, undef
+  %V8F32 = fcmp false <8 x float> undef, undef
+
+  %F64 = fcmp false double undef, undef
+  %V2F64 = fcmp false <2 x double> undef, undef
+  %V4F64 = fcmp false <4 x double> undef, undef
+
+  ret i32 undef
+}
+
+define i32 @cmp_float_true(i32 %arg) {
+; LSX-LABEL: 'cmp_float_true'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp true float undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp true <2 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp true <4 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V8F32 = fcmp true <8 x float> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp true double undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp true <2 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fcmp true <4 x double> undef, undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_float_true'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = fcmp true float undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2F32 = fcmp true <2 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4F32 = fcmp true <4 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8F32 = fcmp true <8 x float> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = fcmp true double undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fcmp true <2 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fcmp true <4 x double> undef, undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F32 = fcmp true float undef, undef
+  %V2F32 = fcmp true <2 x float> undef, undef
+  %V4F32 = fcmp true <4 x float> undef, undef
+  %V8F32 = fcmp true <8 x float> undef, undef
+
+  %F64 = fcmp true double undef, undef
+  %V2F64 = fcmp true <2 x double> undef, undef
+  %V4F64 = fcmp true <4 x double> undef, undef
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/icmp.ll b/llvm/test/Analysis/CostModel/LoongArch/icmp.ll
new file mode 100644
index 0000000000000..bb0879d25f68f
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/icmp.ll
@@ -0,0 +1,471 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_eq'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp eq i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp eq i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp eq <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp eq i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp eq i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_eq'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp eq i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp eq i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp eq <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp eq i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp eq i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp eq i8 %arg8, %arg8
+  %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8
+
+  %I16 = icmp eq i16 %arg16, %arg16
+  %V8I16  = icmp eq <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16
+
+  %I32 = icmp eq i32 %arg32, %arg32
+  %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32
+
+  %I64 = icmp eq i64 %arg64, %arg64
+  %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64
+
+  ret i32 undef
+}
+
+define i32 @cmp_int_ne(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_ne'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ne i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ne i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ne <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ne i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ne i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_ne'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ne i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ne i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ne <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ne i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ne i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp ne i8 %arg8, %arg8
+  %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8
+
+  %I16 = icmp ne i16 %arg16, %arg16
+  %V8I16  = icmp ne <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16
+
+  %I32 = icmp ne i32 %arg32, %arg32
+  %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32
+
+  %I64 = icmp ne i64 %arg64, %arg64
+  %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64
+
+  ret i32 undef
+}
+
+define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_sge'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sge i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sge i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sge <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sge i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sge i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_sge'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sge i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sge i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sge <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sge i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sge i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp sge i8 %arg8, %arg8
+  %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp sge i16 %arg16, %arg16
+  %V8I16  = icmp sge <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp sge i32 %arg32, %arg32
+  %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp sge i64 %arg64, %arg64
+  %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_uge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_uge'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp uge i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp uge i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp uge i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp uge i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_uge'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp uge i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp uge i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp uge i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp uge i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp uge i8 %arg8, %arg8
+  %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp uge i16 %arg16, %arg16
+  %V8I16  = icmp uge <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp uge i32 %arg32, %arg32
+  %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp uge i64 %arg64, %arg64
+  %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_sgt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_sgt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sgt i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sgt i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sgt <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sgt i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sgt i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_sgt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sgt i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sgt i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sgt <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sgt i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sgt i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp sgt i8 %arg8, %arg8
+  %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp sgt i16 %arg16, %arg16
+  %V8I16  = icmp sgt <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp sgt i32 %arg32, %arg32
+  %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp sgt i64 %arg64, %arg64
+  %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_ugt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_ugt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ugt i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ugt i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ugt <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ugt i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ugt i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_ugt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ugt i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ugt i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ugt <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ugt i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ugt i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp ugt i8 %arg8, %arg8
+  %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp ugt i16 %arg16, %arg16
+  %V8I16  = icmp ugt <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp ugt i32 %arg32, %arg32
+  %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp ugt i64 %arg64, %arg64
+  %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_sle'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sle i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sle i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sle <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sle i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sle i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_sle'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp sle i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp sle i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp sle <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp sle i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp sle i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp sle i8 %arg8, %arg8
+  %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp sle i16 %arg16, %arg16
+  %V8I16  = icmp sle <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp sle i32 %arg32, %arg32
+  %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp sle i64 %arg64, %arg64
+  %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_ule(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_ule'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ule i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ule i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ule i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ule i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_ule'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ule i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ule i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ule i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ule i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp ule i8 %arg8, %arg8
+  %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp ule i16 %arg16, %arg16
+  %V8I16  = icmp ule <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp ule i32 %arg32, %arg32
+  %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp ule i64 %arg64, %arg64
+  %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_slt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_slt'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp slt i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp slt i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp slt <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp slt i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp slt i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_slt'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp slt i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp slt i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp slt <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp slt i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp slt i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp slt i8 %arg8, %arg8
+  %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp slt i16 %arg16, %arg16
+  %V8I16  = icmp slt <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp slt i32 %arg32, %arg32
+  %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp slt i64 %arg64, %arg64
+  %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
+
+define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64) {
+; LSX-LABEL: 'cmp_int_ult'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ult i8 %arg8, %arg8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ult i16 %arg16, %arg16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ult <8 x i16> %argv8i16, %argv8i16
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ult i32 %arg32, %arg32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ult i64 %arg64, %arg64
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'cmp_int_ult'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = icmp ult i8 %arg8, %arg8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = icmp ult i16 %arg16, %arg16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = icmp ult <8 x i16> %argv8i16, %argv8i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = icmp ult i32 %arg32, %arg32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = icmp ult i64 %arg64, %arg64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I8 = icmp ult i8 %arg8, %arg8
+  %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8
+  %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8
+  %I16 = icmp ult i16 %arg16, %arg16
+  %V8I16  = icmp ult <8 x i16> %argv8i16, %argv8i16
+  %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16
+  %I32 = icmp ult i32 %arg32, %arg32
+  %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32
+  %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32
+  %I64 = icmp ult i64 %arg64, %arg64
+  %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64
+  %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/select.ll b/llvm/test/Analysis/CostModel/LoongArch/select.ll
new file mode 100644
index 0000000000000..9fbe417ee9bd4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/select.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @test_select() {
+; LSX-LABEL: 'test_select'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = select i1 undef, i64 undef, i64 undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:2 for: %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = select i1 undef, i32 undef, i32 undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = select i1 undef, i16 undef, i16 undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I8 = select i1 undef, i8 undef, i8 undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'test_select'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = select i1 undef, i64 undef, i64 undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = select i1 undef, i32 undef, i32 undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = select i1 undef, i16 undef, i16 undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I8 = select i1 undef, i8 undef, i8 undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %I64 = select i1 undef, i64 undef, i64 undef
+  %V2I64 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+  %V4I64 = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+
+  %I32 = select i1 undef, i32 undef, i32 undef
+  %V4I32 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+  %V8I32 = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+
+  %I16 = select i1 undef, i16 undef, i16 undef
+  %V8I16 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+  %V16I16 = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+
+  %I8 = select i1 undef, i8 undef, i8 undef
+  %V16I8 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+  %V32I8 = select <32 x i1> undef, <32 x i8> undef, <32 x i8> undef
+
+  ret i32 undef
+}
+
+define i32 @test_select_fp() {
+; LSX-LABEL: 'test_select_fp'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F64 = select i1 undef, double undef, double undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2F64 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:2 for: %V4F64 = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %F32 = select i1 undef, float undef, float undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4F32 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8F32 = select <8 x i1> undef, <8 x float> undef, <8 x float> undef
+; LSX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; LASX-LABEL: 'test_select_fp'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F64 = select i1 undef, double undef, double undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2F64 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4F64 = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %F32 = select i1 undef, float undef, float undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4F32 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8F32 = select <8 x i1> undef, <8 x float> undef, <8 x float> undef
+; LASX-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+  %F64 = select i1 undef, double undef, double undef
+  %V2F64 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+  %V4F64 = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
+
+  %F32 = select i1 undef, float undef, float undef
+  %V4F32 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+  %V8F32 = select <8 x i1> undef, <8 x float> undef, <8 x float> undef
+
+  ret i32 undef
+}

>From 787611d4e8c3e3979e21ba81ba332d07b696615f Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Thu, 27 Nov 2025 09:30:09 +0800
Subject: [PATCH 6/8] Support getCFInstrCost

---
 .../LoongArch/LoongArchTargetTransformInfo.cpp     | 14 ++++++++++++++
 .../LoongArch/LoongArchTargetTransformInfo.h       |  3 +++
 2 files changed, 17 insertions(+)

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index ee76750b9c292..de29b6f943030 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -626,6 +626,20 @@ InstructionCost LoongArchTTIImpl::getCmpSelInstrCost(
                                    Op1Info, Op2Info, I);
 }
 
+InstructionCost LoongArchTTIImpl::getCFInstrCost(unsigned Opcode,
+                                                 TTI::TargetCostKind CostKind,
+                                                 const Instruction *I) const {
+  if (Opcode == Instruction::PHI) {
+    return 0;
+  }
+
+  // Branches are assumed to be predicted.
+  if (CostKind == TTI::TCK_RecipThroughput) {
+    return 4;
+  }
+  return 1;
+}
+
 bool LoongArchTTIImpl::prefersVectorizedAddressing() const { return false; }
 
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index fe8a50e878f52..2610c2c7c1228 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -79,6 +79,9 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
       const Instruction *I = nullptr) const override;
 
+  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+                                 const Instruction *I = nullptr) const override;
+
   bool prefersVectorizedAddressing() const override;
 
   TTI::MemCmpExpansionOptions

>From 56168f81c00afb0728842592b2b62c314d818b7e Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Thu, 11 Dec 2025 19:14:14 +0800
Subject: [PATCH 7/8] Support getShuffleCost

---
 .../LoongArchTargetTransformInfo.cpp          | 244 +++++++++++++
 .../LoongArch/LoongArchTargetTransformInfo.h  |   6 +
 .../CostModel/LoongArch/shuffle-broadcast.ll  | 120 ++++++
 .../LoongArch/shuffle-extract-subvetor.ll     | 323 +++++++++++++++++
 .../LoongArch/shuffle-insert-subvector.ll     | 343 ++++++++++++++++++
 .../CostModel/LoongArch/shuffle-reverse.ll    | 120 ++++++
 .../CostModel/LoongArch/shuffle-select.ll     | 104 ++++++
 .../CostModel/LoongArch/shuffle-single-src.ll | 137 +++++++
 .../CostModel/LoongArch/shuffle-splice.ll     | 122 +++++++
 .../CostModel/LoongArch/shuffle-transpose.ll  | 120 ++++++
 .../CostModel/LoongArch/shuffle-two-src.ll    | 120 ++++++
 11 files changed, 1759 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-broadcast.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-extract-subvetor.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-insert-subvector.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-reverse.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-select.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-single-src.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-splice.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-transpose.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/shuffle-two-src.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index de29b6f943030..b8ddca89434c0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -17,7 +17,9 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/InstructionCost.h"
 #include <optional>
 
@@ -640,6 +642,248 @@ InstructionCost LoongArchTTIImpl::getCFInstrCost(unsigned Opcode,
   return 1;
 }
 
+InstructionCost LoongArchTTIImpl::getShuffleCost(
+    TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
+    ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
+    VectorType *SubTp, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) const {
+
+  // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+  // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcTy);
+
+  Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);
+
+  if (Kind == TTI::SK_Broadcast) {
+    // For Broadcasts we are splatting the first element from the first input
+    // register, so only need to reference that input and all the output
+    // registers are the same.
+    LT.first = 1;
+
+    // If we're broadcasting a load with [X]VLDREPL can do this for free.
+    using namespace PatternMatch;
+    if (!Args.empty() && match(Args[0], m_OneUse(m_Load(m_Value()))) &&
+        (ST->hasExtLSX() || ST->hasExtLASX()))
+      return TTI::TCC_Free;
+  }
+
+  // Attempt to detect a cheaper inlane shuffle, avoiding 128-bit subvector
+  // permutation.
+  bool IsInLaneShuffle = false;
+  if (SrcTy->getPrimitiveSizeInBits() > 0 &&
+      (SrcTy->getPrimitiveSizeInBits() % 128) == 0 &&
+      SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
+      Mask.size() == SrcTy->getElementCount().getKnownMinValue()) {
+    unsigned NumLanes = SrcTy->getPrimitiveSizeInBits() / 128;
+    unsigned NumEltsPerLane = Mask.size() / NumLanes;
+    if ((Mask.size() % NumLanes) == 0) {
+      IsInLaneShuffle = all_of(enumerate(Mask), [&](const auto &P) {
+        return P.value() == PoisonMaskElem ||
+               ((P.value() % Mask.size()) / NumEltsPerLane) ==
+                   (P.index() / NumEltsPerLane);
+      });
+    }
+  }
+
+  // Subvector extractions are free if they start at the beginning of a
+  // vector and cheap if the subvectors are aligned.
+  if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
+    int NumElts = LT.second.getVectorNumElements();
+    if ((Index % NumElts) == 0)
+      return TTI::TCC_Free;
+    std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
+    if (SubLT.second.isVector()) {
+      int NumSubElts = SubLT.second.getVectorNumElements();
+      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+        return SubLT.first;
+    }
+    // If the extract subvector is not optimal, treat it as single op shuffle.
+    Kind = TTI::SK_PermuteSingleSrc;
+  }
+
+  // Subvector insertions are cheap if the subvectors are aligned.
+  // Note that in general, the insertion starting at the beginning of a vector
+  // isn't free, because we need to preserve the rest of the wide vector,
+  // but if the destination vector legalizes to the same width as the subvector
+  // then the insertion will simplify to a (free) register copy.
+  if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
+    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
+    int NumElts = DstLT.second.getVectorNumElements();
+    std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
+    if (SubLT.second.isVector()) {
+      int NumSubElts = SubLT.second.getVectorNumElements();
+      bool MatchingTypes =
+          NumElts == NumSubElts &&
+          (SubTp->getElementCount().getKnownMinValue() % NumSubElts) == 0;
+      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+        return MatchingTypes ? TTI::TCC_Free : SubLT.first;
+    }
+
+    // Attempt to match vextrins/xvinsve0 pattern.
+    if (LT.first == 1 && SubLT.first == 1) {
+      // vextrins.{w/d}
+      if (ST->hasExtLSX() &&
+          ((LT.second == MVT::v4f32 && SubLT.second == MVT::f32) ||
+           (LT.second == MVT::v2f64 && SubLT.second == MVT::f64)))
+        return 1;
+
+      // xvinsve0.{w/d}
+      if (ST->hasExtLASX() &&
+          ((LT.second == MVT::v8f32 && SubLT.second == MVT::f32) ||
+           (LT.second == MVT::v4f64 && SubLT.second == MVT::f64)))
+        return 1;
+    }
+
+    // If the insertion is the lowest subvector then it will be blended
+    // otherwise treat it like a 2-op shuffle.
+    Kind =
+        (Index == 0 && LT.first == 1) ? TTI::SK_Select : TTI::SK_PermuteTwoSrc;
+  }
+
+  static const CostKindTblEntry LSXCostTable[] = {
+      {TTI::SK_Broadcast, MVT::v16i8, {1, 1}}, // vreplvei.b
+      {TTI::SK_Broadcast, MVT::v8i16, {1, 1}}, // vreplvei.h
+      {TTI::SK_Broadcast, MVT::v4i32, {1, 1}}, // vreplvei.w
+      {TTI::SK_Broadcast, MVT::v2i64, {1, 1}}, // vreplvei.d
+      {TTI::SK_Broadcast, MVT::v4f32, {1, 1}}, // vreplvei.w
+      {TTI::SK_Broadcast, MVT::v2f64, {1, 1}}, // vreplvei.d
+
+      {TTI::SK_Reverse, MVT::v16i8, {2, 2}}, // vshuf4i.w + vshuf4i.b
+      {TTI::SK_Reverse, MVT::v8i16, {2, 2}}, // vshuf4i.d + vshuf4i.h
+      {TTI::SK_Reverse, MVT::v4i32, {1, 1}}, // vshuf4i.w
+      {TTI::SK_Reverse, MVT::v2i64, {1, 1}}, // vshuf4i.d
+      {TTI::SK_Reverse, MVT::v4f32, {1, 1}}, // vshuf4i.w
+      {TTI::SK_Reverse, MVT::v2f64, {1, 1}}, // vshuf4i.d
+
+      {TTI::SK_Select, MVT::v16i8, {1, 2}}, // vbitsel.v
+      {TTI::SK_Select, MVT::v8i16, {1, 2}}, // vbitsel.v
+      {TTI::SK_Select, MVT::v4i32, {1, 2}}, // vbitsel.v
+      {TTI::SK_Select, MVT::v2i64, {1, 1}}, // vshuf4i.d
+      {TTI::SK_Select, MVT::v4f32, {1, 2}}, // vbitsel.v
+      {TTI::SK_Select, MVT::v2f64, {1, 1}}, // vshuf4i.d
+
+      {TTI::SK_Splice, MVT::v16i8, {3, 3}}, // vbsrl.v + vbsll.v + vor.v
+      {TTI::SK_Splice, MVT::v8i16, {3, 3}}, // vbsrl.v + vbsll.v + vor.v
+      {TTI::SK_Splice, MVT::v4i32, {3, 3}}, // vbsrl.v/vbsll.v + vor.v
+      {TTI::SK_Splice, MVT::v2i64, {1, 1}}, // vshuf4i.d
+      {TTI::SK_Splice, MVT::v4f32, {2, 2}}, // vbsrl.v/vbsll.v + vor.v
+      {TTI::SK_Splice, MVT::v2f64, {1, 1}}, // vshuf4i.d
+
+      {TTI::SK_Transpose, MVT::v16i8, {1, 1}}, // vpackev.b
+      {TTI::SK_Transpose, MVT::v8i16, {1, 1}}, // vpackev.h
+      {TTI::SK_Transpose, MVT::v4i32, {1, 1}}, // vpackev.w
+      {TTI::SK_Transpose, MVT::v2i64, {1, 1}}, // vpackev.d
+      {TTI::SK_Transpose, MVT::v4f32, {1, 1}}, // vpackev.w
+      {TTI::SK_Transpose, MVT::v2f64, {1, 1}}, // vpackev.d
+
+      {TTI::SK_PermuteSingleSrc, MVT::v16i8, {1, 2}}, // vshuf.b
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, {1, 2}}, // vshuf.h
+      {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1}}, // vshuf4i.w
+      {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1}}, // vshuf4i.d
+      {TTI::SK_PermuteSingleSrc, MVT::v4f32, {1, 1}}, // vshuf4i.w
+      {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1}}, // vshuf4i.d
+
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, {1, 2}}, // vshuf.b
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, {1, 2}}, // vshuf.h
+      {TTI::SK_PermuteTwoSrc, MVT::v4i32, {1, 2}}, // vshuf.w
+      {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1}}, // vshuf4i.d
+      {TTI::SK_PermuteTwoSrc, MVT::v4f32, {1, 2}}, // vshuf.w
+      {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1}}, // vshuf4i.d
+  };
+
+  if (ST->hasExtLSX()) {
+    if (const auto *Entry = CostTableLookup(LSXCostTable, Kind, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  static const CostKindTblEntry LASXInLaneCostTable[] = {
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {1, 2}},  // xvshuf.b
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, {1, 2}}, // xvshuf.h
+
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {1, 2}},  // xvshuf.b
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, {1, 2}}, // xvshuf.h
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {1, 2}},  // xvshuf.w
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {1, 2}},  // xvshuf.d
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {1, 2}},  // xvshuf.w
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {1, 2}},  // xvshuf.d
+  };
+
+  if (ST->hasExtLASX() && IsInLaneShuffle) {
+    if (const auto *Entry =
+            CostTableLookup(LASXInLaneCostTable, Kind, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  static const CostKindTblEntry LASXCostTable[] = {
+      {TTI::SK_Broadcast, MVT::v32i8, {4, 2}},  // xvpermi.d + xvrepl128vei.b
+      {TTI::SK_Broadcast, MVT::v16i16, {4, 2}}, // xvpermi.d + xvrepl128vei.h
+      {TTI::SK_Broadcast, MVT::v8i32, {4, 2}},  // xvpermi.d + xvrepl128vei.w
+      {TTI::SK_Broadcast, MVT::v4i64, {3, 1}},  // xvpermi.d
+      {TTI::SK_Broadcast, MVT::v8f32, {4, 2}},  // xvpermi.d + xvrepl128vei.w
+      {TTI::SK_Broadcast, MVT::v4f64, {3, 1}},  // xvpermi.d
+
+      {TTI::SK_Reverse, MVT::v32i8, {5, 3}},  // xvpermi.d + xvshuf4i.w
+                                              // + xvshuf4i.b
+      {TTI::SK_Reverse, MVT::v16i16, {4, 2}}, // xvpermi.d + xvshuf4i.h
+      {TTI::SK_Reverse, MVT::v8i32, {4, 2}},  // xvpermi.d + xvshuf4i.w
+      {TTI::SK_Reverse, MVT::v4i64, {3, 1}},  // xvpermi.d
+      {TTI::SK_Reverse, MVT::v8f32, {4, 2}},  // xvpermi.d + xvshuf4i.w
+      {TTI::SK_Reverse, MVT::v4f64, {3, 1}},  // xvpermi.d
+
+      {TTI::SK_Select, MVT::v32i8, {1, 2}},  // xvbitsel.v
+      {TTI::SK_Select, MVT::v16i16, {1, 2}}, // xvbitsel.v
+      {TTI::SK_Select, MVT::v8i32, {1, 2}},  // xvbitsel.v
+      {TTI::SK_Select, MVT::v4i64, {1, 2}},  // xvbitsel.v
+      {TTI::SK_Select, MVT::v8f32, {1, 2}},  // xvbitsel.v
+      {TTI::SK_Select, MVT::v4f64, {1, 2}},  // xvbitsel.v
+
+      {TTI::SK_Splice, MVT::v32i8, {6, 4}},  // xvpermi.q + xvbsll.v + xvbsrl.v
+                                             // + xvor.v
+      {TTI::SK_Splice, MVT::v16i16, {6, 4}}, // xvpermi.q + xvbsll.v + xvbsrl.v
+                                             // + xvor.v
+      {TTI::SK_Splice, MVT::v8i32, {6, 4}},  // xvpermi.q + xvbsll.v + xvbsrl.v
+                                             // + xvor.v
+      {TTI::SK_Splice, MVT::v4i64, {4, 2}},  // xvpermi.q + xvshuf4i.d
+      {TTI::SK_Splice, MVT::v8f32, {6, 4}},  // xvpermi.q + xvbsll.v + xvbsrl.v
+                                             // + xvor.v
+      {TTI::SK_Splice, MVT::v4f64, {4, 2}},  // xvpermi.q + xvshuf4i.d
+
+      {TTI::SK_Transpose, MVT::v32i8, {1, 1}},  // xvpackev.b
+      {TTI::SK_Transpose, MVT::v16i16, {1, 1}}, // xvpackev.h
+      {TTI::SK_Transpose, MVT::v8i32, {1, 1}},  // xvpackev.w
+      {TTI::SK_Transpose, MVT::v4i64, {1, 1}},  // xvpackev.d
+      {TTI::SK_Transpose, MVT::v8f32, {1, 1}},  // xvpackev.w
+      {TTI::SK_Transpose, MVT::v4f64, {1, 1}},  // xvpackev.d
+
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, {4, 3}},  // xvpermi.d + xvshuf.b
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, {4, 3}}, // xvpermi.d + xvshuf.h
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, {3, 1}},  // xvperm.w
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, {3, 1}},  // xvpermi.d
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, {3, 1}},  // xvperm.w
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, {3, 1}},  // xvpermi.d
+
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, {9, 8}},  // 2 *xvpermi.q + 2*xvshuf.b
+                                                    // + xvbitsel.v
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, {9, 8}}, // 2*xvpermi.q + 2*xvshuf.h
+                                                    // + xvbitsel.v
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, {7, 4}},  // 2*xvperm.w + xvbitsel.v
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, {7, 4}},  // 2*xvpermi.d + xvshuf.d
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, {7, 4}},  // 2*xvperm.w + xvbitsel.v
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, {7, 4}},  // 2*xvpermi.d + xvshuf.d
+  };
+
+  if (ST->hasExtLASX()) {
+    if (const auto *Entry = CostTableLookup(LASXCostTable, Kind, LT.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * *KindCost;
+  }
+
+  return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index,
+                               SubTp);
+}
+
 bool LoongArchTTIImpl::prefersVectorizedAddressing() const { return false; }
 
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 2610c2c7c1228..19604e9f772e4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -82,6 +82,12 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                  const Instruction *I = nullptr) const override;
 
+  InstructionCost
+  getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
+                 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
+                 VectorType *SubTp, ArrayRef<const Value *> Args = {},
+                 const Instruction *CxtI = nullptr) const override;
+
   bool prefersVectorizedAddressing() const override;
 
   TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-broadcast.ll
new file mode 100644
index 0000000000000..ffb8978262117
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-broadcast.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:3 SizeLat:5 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
+  %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:3 SizeLat:9 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
+  %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:9 Lat:4 SizeLat:9 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
+  %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
+  %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:17 Lat:4 SizeLat:17 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
+  %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
+  %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:17 Lat:1 SizeLat:17 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:33 Lat:4 SizeLat:33 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32  = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+  %V64  = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+  %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+  %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:17 Lat:1 SizeLat:17 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:33 Lat:1 SizeLat:33 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:5 Lat:1 SizeLat:5 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:9 Lat:1 SizeLat:9 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:17 Lat:1 SizeLat:17 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:65 Lat:4 SizeLat:65 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16  = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+  %V32  = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+  %V64  = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+  %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+  %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-extract-subvetor.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-extract-subvetor.ll
new file mode 100644
index 0000000000000..93ef74bdd26b0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-extract-subvetor.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<4 x double> %src256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  ret void
+}
+
+define void @test_vXi64(<4 x i64> %src256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  ret void
+}
+
+define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:3 SizeLat:4 for: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:6 Lat:3 SizeLat:6 for: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 4, i32 5>
+  %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+  %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret void
+}
+
+define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:10 Lat:4 SizeLat:10 for: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:12 Lat:4 SizeLat:12 for: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+  %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 2, i32 3>
+  %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 4, i32 5>
+  %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> <i32 6, i32 7>
+  %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 2, i32 3>
+  %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 4, i32 5>
+  %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
+  %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
+  %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
+  %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
+  %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
+  %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret void
+}
+
+define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:2 SizeLat:16 for: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:2 SizeLat:16 for: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:6 Lat:4 SizeLat:6 for: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:4 SizeLat:8 for: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:12 Lat:4 SizeLat:12 for: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:12 Lat:4 SizeLat:12 for: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:12 Lat:4 SizeLat:12 for: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:16 Lat:4 SizeLat:16 for: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:24 Lat:4 SizeLat:24 for: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 2, i32 3>
+  %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 4, i32 5>
+  %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> <i32 6, i32 7>
+  %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
+  %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
+  %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
+  %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
+  %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
+  %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
+  %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
+  %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>
+  %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>
+  %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>
+  %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>
+  %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>
+  %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>
+  %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>
+  %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>
+  %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
+  %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
+  %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
+  %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
+  %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
+  %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
+  %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
+  %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+  %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
+  %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
+  %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
+  %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-insert-subvector.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-insert-subvector.ll
new file mode 100644
index 0000000000000..be116943a3d6a
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-insert-subvector.ll
@@ -0,0 +1,343 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+
+  %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+
+  %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:7 SizeLat:4 for: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src64_128  = shufflevector <2 x float> %src64,  <2 x float> undef,  <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %src64_256  = shufflevector <2 x float> %src64,  <2 x float> undef,  <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef,  <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+
+  %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+  %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:8 for: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src64_128  = shufflevector <2 x i32> %src64,  <2 x i32> undef,  <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %src64_256  = shufflevector <2 x i32> %src64,  <2 x i32> undef,  <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef,  <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+
+  %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+  %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:2 SizeLat:8 for: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:8 Lat:9 SizeLat:8 for: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:8 Lat:9 SizeLat:8 for: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:8 Lat:9 SizeLat:8 for: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:9 SizeLat:16 for: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src32_64   = shufflevector <2 x i16> %src32,   <2 x i16> undef,   <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %src32_128  = shufflevector <2 x i16> %src32,   <2 x i16> undef,   <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src32_256  = shufflevector <2 x i16> %src32,   <2 x i16> undef,  <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src64_128  = shufflevector <4 x i16> %src64,   <4 x i16> undef,   <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src64_256  = shufflevector <4 x i16> %src64,   <4 x i16> undef,  <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src128_256 = shufflevector <8 x i16> %src128,  <8 x i16> undef,  <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+
+  %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+  %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+  %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+  %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+  %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+  %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %src16_32   = shufflevector <2 x i8> %src16,   <2 x i8> undef,   <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %src16_64   = shufflevector <2 x i8> %src16,   <2 x i8> undef,   <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src16_128  = shufflevector <2 x i8> %src16,   <2 x i8> undef,  <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src16_256  = shufflevector <2 x i8> %src16,   <2 x i8> undef,  <32 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src32_64   = shufflevector <4 x i8> %src32,   <4 x i8> undef,   <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src32_128  = shufflevector <4 x i8> %src32,   <4 x i8> undef,  <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src32_256  = shufflevector <4 x i8> %src32,   <4 x i8> undef,  <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src64_128  = shufflevector <8 x i8> %src64,   <8 x i8> undef,  <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src64_256  = shufflevector <8 x i8> %src64,   <8 x i8> undef,  <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+
+  %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+  %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
+  %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
+  %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+  %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+  %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-reverse.ll
new file mode 100644
index 0000000000000..63e790efad866
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-reverse.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:3 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:14 Lat:3 SizeLat:14 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:4 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:28 Lat:4 SizeLat:28 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:32 Lat:4 SizeLat:32 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:56 Lat:4 SizeLat:56 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32  = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %V64  = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:2 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:64 Lat:4 SizeLat:64 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:2 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:112 Lat:5 SizeLat:112 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16  = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  %V32  = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %V64  = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-select.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-select.ll
new file mode 100644
index 0000000000000..4d650207a02d2
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-select.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <2 x double> %src128_1, <4 x double> %src256_1) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 1, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 1, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 1, i32 3>
+  %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <2 x i64> %src128_1, <4 x i64> %src256_1) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 2, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 2, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:14 Lat:1 SizeLat:14 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 2, i32 1>
+  %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 2, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: store <8 x i32> %V256, ptr undef, align 32
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 2, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:28 Lat:1 SizeLat:28 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: store <8 x i32> %V256, ptr undef, align 32
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 2, i32 1>
+  %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+  store <8 x i32> %V256, <8 x i32>* undef
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <2 x i16> %src32_1, <4 x i16> %src64_1, <8 x i16> %src128_1, <16 x i16> %src256_1) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 0, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:32 Lat:2 SizeLat:32 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 0, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:56 Lat:1 SizeLat:56 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 0, i32 3>
+  %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+  %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 0, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:64 Lat:2 SizeLat:64 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 32, i32 33, i32 34, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 61, i32 62, i32 63>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 0, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:112 Lat:1 SizeLat:112 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 32, i32 33, i32 34, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 61, i32 62, i32 63>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 0, i32 3>
+  %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 14, i32 15>
+  %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 31>
+  %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 32, i32 33, i32 34, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 61, i32 62, i32 63>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-single-src.ll
new file mode 100644
index 0000000000000..603c73804de04
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-single-src.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:3 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:14 Lat:3 SizeLat:14 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:3 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64  = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+  %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:28 Lat:3 SizeLat:28 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64  = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+  %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:32 Lat:2 SizeLat:32 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:56 Lat:9 SizeLat:56 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32  = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+  %V64  = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
+  %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+  %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:64 Lat:2 SizeLat:64 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:3 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:112 Lat:4 SizeLat:112 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16  = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+  %V32  = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
+  %V64  = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
+  %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret void
+}
+
+
+define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256) {
+; LSX-LABEL: 'identity_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of 0 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'identity_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of 0 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 undef, i32 2, i32 undef>
+  %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-splice.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-splice.ll
new file mode 100644
index 0000000000000..5a83423eb396b
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-splice.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:4 SizeLat:8 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 1, i32 2>
+  %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:14 Lat:4 SizeLat:14 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 1, i32 2>
+  %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 1, i32 2, i32 3, i32 4> ret void
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:4 SizeLat:16 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:6 SizeLat:16 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 1, i32 2>
+  %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:16 Lat:6 SizeLat:16 for: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:28 Lat:6 SizeLat:28 for: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 1, i32 2>
+  %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:16 Lat:3 SizeLat:16 for: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; LSX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:32 Lat:6 SizeLat:32 for: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:16 Lat:3 SizeLat:16 for: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:56 Lat:6 SizeLat:56 for: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 1, i32 2>
+  %V64  = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 1, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:16 Lat:3 SizeLat:16 for: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+; LSX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:32 Lat:3 SizeLat:32 for: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
+; LSX-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:64 Lat:6 SizeLat:64 for: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:4 for: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 1, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:8 Lat:3 SizeLat:8 for: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:16 Lat:3 SizeLat:16 for: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+; LASX-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:32 Lat:3 SizeLat:32 for: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:112 Lat:6 SizeLat:112 for: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 1, i32 2>
+  %V32  = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %V64  = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
+  %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-transpose.ll
new file mode 100644
index 0000000000000..506c03de9bcf3
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-transpose.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 0, i32 2>
+  %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:14 Lat:1 SizeLat:14 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 0, i32 2>
+  %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:1 SizeLat:16 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 0, i32 2>
+  %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:28 Lat:1 SizeLat:28 for: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> <i32 0, i32 2>
+  %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:2 SizeLat:32 for: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:56 Lat:1 SizeLat:56 for: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> <i32 0, i32 2>
+  %V64  = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 0, i32 2>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:64 Lat:2 SizeLat:64 for: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 0, i32 2>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:112 Lat:1 SizeLat:112 for: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> <i32 0, i32 2>
+  %V32  = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %V64  = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/LoongArch/shuffle-two-src.ll
new file mode 100644
index 0000000000000..ca24d2e7b8de2
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/shuffle-two-src.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <2 x double> %src128_1, <4 x double> %src256_1) {
+; LSX-LABEL: 'test_vXf64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:8 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
+  %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+  ret void
+}
+
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <2 x i64> %src128_1, <4 x i64> %src256_1) {
+; LSX-LABEL: 'test_vXi64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:2 SizeLat:8 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:1 SizeLat:4 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:14 Lat:7 SizeLat:14 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
+  %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+  ret void
+}
+
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1) {
+; LSX-LABEL: 'test_vXf32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXf32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:7 SizeLat:16 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
+  %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+  %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+  ret void
+}
+
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1) {
+; LSX-LABEL: 'test_vXi32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:16 Lat:2 SizeLat:16 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:28 Lat:7 SizeLat:28 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
+  %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+  %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+  ret void
+}
+
+define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <2 x i16> %src32_1, <4 x i16> %src64_1, <8 x i16> %src128_1, <16 x i16> %src256_1) {
+; LSX-LABEL: 'test_vXi16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:32 Lat:2 SizeLat:32 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:56 Lat:9 SizeLat:56 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
+  %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+  %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+  ret void
+}
+
+define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1) {
+; LSX-LABEL: 'test_vXi8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:64 Lat:2 SizeLat:64 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'test_vXi8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:1 SizeLat:4 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:8 Lat:1 SizeLat:8 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:16 Lat:1 SizeLat:16 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:32 Lat:1 SizeLat:32 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:112 Lat:9 SizeLat:112 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0>
+  %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+  %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+  %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+  ret void
+}

>From 24c53814530630d2817fd902b368494f108cb16a Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 22 Dec 2025 17:20:20 +0800
Subject: [PATCH 8/8] Support getIntrinsicInstrCost

---
 .../LoongArchTargetTransformInfo.cpp          | 292 +++++++++++++++
 .../LoongArch/LoongArchTargetTransformInfo.h  |   4 +
 llvm/test/Analysis/CostModel/LoongArch/abs.ll | 172 +++++++++
 .../Analysis/CostModel/LoongArch/arith-fma.ll |  41 +++
 .../Analysis/CostModel/LoongArch/arith-fp.ll  |  46 ++-
 .../CostModel/LoongArch/arith-log2.ll         |  42 +++
 .../CostModel/LoongArch/arith-sminmax.ll      | 135 +++++++
 .../CostModel/LoongArch/arith-ssat.ll         | 135 +++++++
 .../CostModel/LoongArch/arith-uminmax.ll      | 135 +++++++
 .../CostModel/LoongArch/arith-usat.ll         | 135 +++++++
 .../CostModel/LoongArch/bitreverse.ll         | 175 +++++++++
 .../Analysis/CostModel/LoongArch/bswap.ll     |  72 ++++
 .../test/Analysis/CostModel/LoongArch/ctlz.ll | 318 +++++++++++++++++
 .../Analysis/CostModel/LoongArch/ctpop.ll     | 175 +++++++++
 .../test/Analysis/CostModel/LoongArch/cttz.ll | 331 ++++++++++++++++++
 15 files changed, 2200 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/abs.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-fma.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-log2.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-sminmax.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-ssat.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-uminmax.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/arith-usat.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/bitreverse.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/bswap.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/ctlz.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/ctpop.ll
 create mode 100644 llvm/test/Analysis/CostModel/LoongArch/cttz.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index b8ddca89434c0..1c8420a2ca262 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGenTypes/MachineValueType.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/InstructionCost.h"
 #include <optional>
@@ -884,6 +885,297 @@ InstructionCost LoongArchTTIImpl::getShuffleCost(
                                SubTp);
 }
 
+InstructionCost
+LoongArchTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                        TTI::TargetCostKind CostKind) const {
+  static const CostKindTblEntry LASXCostTable[] = {
+      {ISD::ABS, MVT::v32i8, {1, 2}},  // xvsigncov.b
+      {ISD::ABS, MVT::v16i16, {1, 2}}, // xvsigncov.h
+      {ISD::ABS, MVT::v8i32, {1, 2}},  // xvsigncov.w
+      {ISD::ABS, MVT::v4i64, {1, 2}},  // xvsigncov.d
+
+      {ISD::SADDSAT, MVT::v32i8, {1, 1}},  // xvsadd.b
+      {ISD::SADDSAT, MVT::v16i16, {1, 1}}, // xvsadd.h
+      {ISD::SADDSAT, MVT::v8i32, {1, 1}},  // xvsadd.w
+      {ISD::SADDSAT, MVT::v4i64, {1, 1}},  // xvsadd.d
+
+      {ISD::SSUBSAT, MVT::v32i8, {1, 1}},  // xvssub.b
+      {ISD::SSUBSAT, MVT::v16i16, {1, 1}}, // xvssub.h
+      {ISD::SSUBSAT, MVT::v8i32, {1, 1}},  // xvssub.w
+      {ISD::SSUBSAT, MVT::v4i64, {1, 1}},  // xvssub.d
+
+      {ISD::UADDSAT, MVT::v32i8, {1, 1}},  // xvadd.bu
+      {ISD::UADDSAT, MVT::v16i16, {1, 1}}, // xvadd.hu
+      {ISD::UADDSAT, MVT::v8i32, {1, 1}},  // xvadd.wu
+      {ISD::UADDSAT, MVT::v4i64, {1, 1}},  // xvadd.du
+
+      {ISD::USUBSAT, MVT::v32i8, {1, 1}},  // xvsub.bu
+      {ISD::USUBSAT, MVT::v16i16, {1, 1}}, // xvsub.hu
+      {ISD::USUBSAT, MVT::v8i32, {1, 1}},  // xvsub.wu
+      {ISD::USUBSAT, MVT::v4i64, {1, 1}},  // xvsub.du
+
+      {ISD::SMAX, MVT::v32i8, {1, 1}},  // xvmax.b
+      {ISD::SMAX, MVT::v16i16, {1, 1}}, // xvmax.h
+      {ISD::SMAX, MVT::v8i32, {1, 1}},  // xvmax.w
+      {ISD::SMAX, MVT::v4i64, {2, 1}},  // xvmax.d
+
+      {ISD::SMIN, MVT::v32i8, {1, 1}},  // xvmin.b
+      {ISD::SMIN, MVT::v16i16, {1, 1}}, // xvmin.h
+      {ISD::SMIN, MVT::v8i32, {1, 1}},  // xvmin.w
+      {ISD::SMIN, MVT::v4i64, {2, 1}},  // xvmin.d
+
+      {ISD::UMAX, MVT::v32i8, {1, 1}},  // xvmax.bu
+      {ISD::UMAX, MVT::v16i16, {1, 1}}, // xvmax.hu
+      {ISD::UMAX, MVT::v8i32, {1, 1}},  // xvmax.wu
+      {ISD::UMAX, MVT::v4i64, {2, 1}},  // xvmax.du
+
+      {ISD::UMIN, MVT::v32i8, {1, 1}},  // xvmin.bu
+      {ISD::UMIN, MVT::v16i16, {1, 1}}, // xvmin.hu
+      {ISD::UMIN, MVT::v8i32, {1, 1}},  // xvmin.wu
+      {ISD::UMIN, MVT::v4i64, {2, 1}},  // xvmin.du
+
+      {ISD::FMAXNUM, MVT::v8f32, {2, 1}}, // xvfmax.s
+      {ISD::FMAXNUM, MVT::v4f64, {2, 1}}, // xvfmax.d
+      {ISD::FMINNUM, MVT::v8f32, {2, 1}}, // xvfmin.s
+      {ISD::FMINNUM, MVT::v4f64, {2, 1}}, // xvfmin.d
+
+      {ISD::FLOG2, MVT::v8f32, {4, 1}}, // xvflogb.s
+      {ISD::FLOG2, MVT::v4f64, {4, 1}}, // xvflogb.d
+
+      {ISD::FMA, MVT::v8f32, {5, 2}}, // xvfmadd.s
+      {ISD::FMA, MVT::v4f64, {5, 2}}, // xvfmadd.d
+
+      {ISD::FSQRT, MVT::v8f32, {25, 28}}, // xvrsqrt.s
+      {ISD::FSQRT, MVT::v4f64, {22, 20}}, // xvrsqrt.d
+
+      {ISD::CTPOP, MVT::v32i8, {2, 2}},  // xvpcnt.b
+      {ISD::CTPOP, MVT::v16i16, {2, 2}}, // xvpcnt.h
+      {ISD::CTPOP, MVT::v8i32, {2, 2}},  // xvpcnt.w
+      {ISD::CTPOP, MVT::v4i64, {2, 2}},  // xvpcnt.d
+
+      {ISD::CTLZ, MVT::v32i8, {2, 1}},  // xvclz.b
+      {ISD::CTLZ, MVT::v16i16, {2, 1}}, // xvclz.h
+      {ISD::CTLZ, MVT::v8i32, {2, 1}},  // xvclz.w
+      {ISD::CTLZ, MVT::v4i64, {2, 1}},  // xvclz.d
+
+      {ISD::CTTZ, MVT::v32i8, {4, 4}},  // xvsubi.bu + xvandn.v + xvpcnt.b
+      {ISD::CTTZ, MVT::v16i16, {4, 4}}, // xvsubi.hu + xvandn.v + xvpcnt.h
+      {ISD::CTTZ, MVT::v8i32, {4, 4}},  // xvsubi.wu + xvandn.v + xvpcnt.w
+      {ISD::CTTZ, MVT::v4i64, {4, 4}},  // xvsubi.du + xvandn.v + xvpcnt.d
+
+      {ISD::BSWAP, MVT::v16i16, {1, 1}}, // xvshuf4i.b
+      {ISD::BSWAP, MVT::v8i32, {1, 1}},  // xvshuf4i.b
+      {ISD::BSWAP, MVT::v4i64, {2, 2}},  // xvshuf4i.b + xvshuf4i.w
+  };
+
+  static const CostKindTblEntry LSXCostTable[] = {
+      {ISD::ABS, MVT::v16i8, {1, 2}}, // vsigncov.b
+      {ISD::ABS, MVT::v8i16, {1, 2}}, // vsigncov.h
+      {ISD::ABS, MVT::v4i32, {1, 2}}, // vsigncov.w
+      {ISD::ABS, MVT::v2i64, {1, 2}}, // vsigncov.d
+
+      {ISD::SADDSAT, MVT::v16i8, {1, 1}}, // vsadd.b
+      {ISD::SADDSAT, MVT::v8i16, {1, 1}}, // vsadd.h
+      {ISD::SADDSAT, MVT::v4i32, {1, 1}}, // vsadd.w
+      {ISD::SADDSAT, MVT::v2i64, {1, 1}}, // vsadd.d
+
+      {ISD::SSUBSAT, MVT::v16i8, {1, 1}}, // vssub.b
+      {ISD::SSUBSAT, MVT::v8i16, {1, 1}}, // vssub.h
+      {ISD::SSUBSAT, MVT::v4i32, {1, 1}}, // vssub.w
+      {ISD::SSUBSAT, MVT::v2i64, {1, 1}}, // vssub.d
+
+      {ISD::UADDSAT, MVT::v16i8, {1, 1}}, // vsadd.bu
+      {ISD::UADDSAT, MVT::v8i16, {1, 1}}, // vsadd.hu
+      {ISD::UADDSAT, MVT::v4i32, {1, 1}}, // vsadd.wu
+      {ISD::UADDSAT, MVT::v2i64, {1, 1}}, // vsadd.du
+
+      {ISD::USUBSAT, MVT::v16i8, {1, 1}}, // vssub.bu
+      {ISD::USUBSAT, MVT::v8i16, {1, 1}}, // vssub.hu
+      {ISD::USUBSAT, MVT::v4i32, {1, 1}}, // vssub.wu
+      {ISD::USUBSAT, MVT::v2i64, {1, 1}}, // vssub.du
+
+      {ISD::SMAX, MVT::v16i8, {1, 1}}, // vmax.b
+      {ISD::SMAX, MVT::v8i16, {1, 1}}, // vmax.h
+      {ISD::SMAX, MVT::v4i32, {1, 1}}, // vmax.w
+      {ISD::SMAX, MVT::v2i64, {2, 1}}, // vmax.d
+
+      {ISD::SMIN, MVT::v16i8, {1, 1}}, // vmin.b
+      {ISD::SMIN, MVT::v8i16, {1, 1}}, // vmin.h
+      {ISD::SMIN, MVT::v4i32, {1, 1}}, // vmin.w
+      {ISD::SMIN, MVT::v2i64, {2, 1}}, // vmin.d
+
+      {ISD::UMAX, MVT::v16i8, {1, 1}}, // vmax.bu
+      {ISD::UMAX, MVT::v8i16, {1, 1}}, // vmax.hu
+      {ISD::UMAX, MVT::v4i32, {1, 1}}, // vmax.wu
+      {ISD::UMAX, MVT::v2i64, {2, 1}}, // vmax.du
+
+      {ISD::UMIN, MVT::v16i8, {1, 1}}, // vmin.bu
+      {ISD::UMIN, MVT::v8i16, {1, 1}}, // vmin.hu
+      {ISD::UMIN, MVT::v4i32, {1, 1}}, // vmin.wu
+      {ISD::UMIN, MVT::v2i64, {2, 1}}, // vmin.du
+
+      {ISD::FMAXNUM, MVT::v4f32, {2, 1}}, // vfmax.s
+      {ISD::FMAXNUM, MVT::v2f64, {2, 1}}, // vfmax.d
+      {ISD::FMINNUM, MVT::v4f32, {2, 1}}, // vfmin.s
+      {ISD::FMINNUM, MVT::v2f64, {2, 1}}, // vfmin.d
+
+      {ISD::FLOG2, MVT::v4f32, {4, 1}}, // vflogb.s
+      {ISD::FLOG2, MVT::v2f64, {4, 1}}, // vflogb.d
+
+      {ISD::FMA, MVT::v4f32, {5, 2}}, // vfmadd.s
+      {ISD::FMA, MVT::v2f64, {5, 2}}, // vfmadd.d
+
+      {ISD::FSQRT, MVT::v4f32, {25, 28}}, // vfsqrt.s
+      {ISD::FSQRT, MVT::v2f64, {22, 20}}, // vfsqrt.d
+
+      {ISD::CTPOP, MVT::v16i8, {2, 2}}, // vpcnt.b
+      {ISD::CTPOP, MVT::v8i16, {2, 2}}, // vpcnt.h
+      {ISD::CTPOP, MVT::v4i32, {2, 2}}, // vpcnt.w
+      {ISD::CTPOP, MVT::v2i64, {2, 2}}, // vpcnt.d
+
+      {ISD::CTLZ, MVT::v16i8, {2, 1}}, // vclz.b
+      {ISD::CTLZ, MVT::v8i16, {2, 1}}, // vclz.h
+      {ISD::CTLZ, MVT::v4i32, {2, 1}}, // vclz.w
+      {ISD::CTLZ, MVT::v2i64, {2, 1}}, // vclz.d
+
+      {ISD::CTTZ, MVT::v16i8, {4, 4}}, // vsubi.bu + vandn.v + vpcnt.b
+      {ISD::CTTZ, MVT::v8i16, {4, 4}}, // vsubi.hu + vandn.v + vpcnt.h
+      {ISD::CTTZ, MVT::v4i32, {4, 4}}, // vsubi.wu + vandn.v + vpcnt.w
+      {ISD::CTTZ, MVT::v2i64, {4, 4}}, // vsubi.du + vandn.v + vpcnt.d
+
+      {ISD::BSWAP, MVT::v8i16, {1, 1}}, // vshuf4i.b
+      {ISD::BSWAP, MVT::v4i32, {1, 1}}, // vshuf4i.b
+      {ISD::BSWAP, MVT::v2i64, {2, 2}}, // vshuf4i.b + vshuf4i.w
+  };
+
+  static const CostKindTblEntry LA64CostTable[] = {
+      {ISD::ABS, MVT::i8, {3, 3}},  // srai.d + xor + sub.d
+      {ISD::ABS, MVT::i16, {3, 3}}, // srai.d + xor + sub.d
+      {ISD::ABS, MVT::i32, {3, 3}}, // srai.d + xor + sub.d
+      {ISD::ABS, MVT::i64, {3, 3}}, // srai.d + xor + sub.d
+
+      {ISD::FMINNUM, MVT::f32, {2, 1}}, // fmin.s
+      {ISD::FMINNUM, MVT::f64, {2, 1}}, // fmin.d
+
+      {ISD::FLOG2, MVT::f32, {4, 1}}, // flogb.s
+      {ISD::FLOG2, MVT::f64, {4, 1}}, // flogb.d
+
+      {ISD::FMA, MVT::f32, {5, 2}}, // fmadd.s
+      {ISD::FMA, MVT::f64, {5, 2}}, // fmadd.d
+
+      {ISD::FSQRT, MVT::f32, {15, 9}},  // fsqrt.s
+      {ISD::FSQRT, MVT::f64, {22, 10}}, // fsqrt.d
+
+      {ISD::CTLZ, MVT::i8, {1, 1}},  // clz.b
+      {ISD::CTLZ, MVT::i16, {1, 1}}, // clz.h
+      {ISD::CTLZ, MVT::i32, {1, 1}}, // clz.w
+      {ISD::CTLZ, MVT::i64, {1, 1}}, // clz.d
+
+      {ISD::CTTZ, MVT::i8, {1, 1}},  // ctz.b
+      {ISD::CTTZ, MVT::i16, {1, 1}}, // ctz.h
+      {ISD::CTTZ, MVT::i32, {1, 1}}, // ctz.w
+      {ISD::CTTZ, MVT::i64, {1, 1}}, // ctz.d
+
+      {ISD::BITREVERSE, MVT::i8, {1, 1}},  // bitrev.4b
+      {ISD::BITREVERSE, MVT::i16, {2, 2}}, // bitrev.d + srli.d
+      {ISD::BITREVERSE, MVT::i32, {1, 1}}, // bitrev.w
+      {ISD::BITREVERSE, MVT::i64, {1, 1}}, // bitrev.d
+
+      {ISD::BSWAP, MVT::i16, {1, 1}}, // bswap.2h
+      {ISD::BSWAP, MVT::i32, {1, 1}}, // bswap.2w
+      {ISD::BSWAP, MVT::i64, {1, 1}}, // bswap.d
+  };
+
+  Type *RetTy = ICA.getReturnType();
+  Type *OpTy = RetTy;
+  Intrinsic::ID IID = ICA.getID();
+  unsigned ISD = ISD::DELETED_NODE;
+  switch (IID) {
+  default:
+    break;
+  case Intrinsic::abs:
+    ISD = ISD::ABS;
+    break;
+  case Intrinsic::sadd_sat:
+    ISD = ISD::SADDSAT;
+    break;
+  case Intrinsic::ssub_sat:
+    ISD = ISD::SSUBSAT;
+    break;
+  case Intrinsic::uadd_sat:
+    ISD = ISD::UADDSAT;
+    break;
+  case Intrinsic::usub_sat:
+    ISD = ISD::USUBSAT;
+    break;
+  case Intrinsic::smax:
+    ISD = ISD::SMAX;
+    break;
+  case Intrinsic::smin:
+    ISD = ISD::SMIN;
+    break;
+  case Intrinsic::umax:
+    ISD = ISD::UMAX;
+    break;
+  case Intrinsic::umin:
+    ISD = ISD::UMIN;
+    break;
+  case Intrinsic::maxnum:
+    ISD = ISD::FMAXNUM;
+    break;
+  case Intrinsic::minnum:
+    ISD = ISD::FMINNUM;
+    break;
+  case Intrinsic::log2:
+    ISD = ISD::FLOG2;
+    break;
+  case Intrinsic::fma:
+    ISD = ISD::FMA;
+    break;
+  case Intrinsic::sqrt:
+    ISD = ISD::FSQRT;
+    break;
+  case Intrinsic::ctlz:
+    ISD = ISD::CTLZ;
+    break;
+  case Intrinsic::ctpop:
+    ISD = ISD::CTPOP;
+    break;
+  case Intrinsic::cttz:
+    ISD = ISD::CTTZ;
+    break;
+  case Intrinsic::bitreverse:
+    ISD = ISD::BITREVERSE;
+    break;
+  case Intrinsic::bswap:
+    ISD = ISD::BSWAP;
+    break;
+  }
+
+  if (ISD != ISD::DELETED_NODE) {
+
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(OpTy);
+    MVT MTy = LT.second;
+
+    if (ST->hasExtLASX())
+      if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, MTy))
+        if (auto KindCost = Entry->Cost[CostKind])
+          return LT.first * *KindCost;
+
+    if (ST->hasExtLSX())
+      if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, MTy))
+        if (auto KindCost = Entry->Cost[CostKind])
+          return LT.first * *KindCost;
+
+    if (ST->is64Bit())
+      if (const auto *Entry = CostTableLookup(LA64CostTable, ISD, MTy))
+        if (auto KindCost = Entry->Cost[CostKind])
+          return LT.first * *KindCost;
+  }
+
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
 bool LoongArchTTIImpl::prefersVectorizedAddressing() const { return false; }
 
 // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 19604e9f772e4..0616a9a1cfe45 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -88,6 +88,10 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
                  VectorType *SubTp, ArrayRef<const Value *> Args = {},
                  const Instruction *CxtI = nullptr) const override;
 
+  InstructionCost
+  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                        TTI::TargetCostKind CostKind) const override;
+
   bool prefersVectorizedAddressing() const override;
 
   TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Analysis/CostModel/LoongArch/abs.ll b/llvm/test/Analysis/CostModel/LoongArch/abs.ll
new file mode 100644
index 0000000000000..26ae4ab90df49
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/abs.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @cost_abs_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
+; LSX-LABEL: 'cost_abs_i64'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i64'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I64    = call i64 @llvm.abs.i64(i64 %a64, i1 0)
+  %V2I64  = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 0)
+  %V4I64  = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 0)
+  ret void
+}
+
+define void @cost_abs_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
+; LSX-LABEL: 'cost_abs_i32'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i32'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I32   = call i32 @llvm.abs.i32(i32 %a32, i1 0)
+  %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 0)
+  %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 0)
+  ret void
+}
+
+define void @cost_abs_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
+; LSX-LABEL: 'cost_abs_i16'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i16'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I16    = call i16 @llvm.abs.i16(i16 %a16, i1 0)
+  %V8I16  = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 0)
+  %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 0)
+  ret void
+}
+
+define void @cost_abs_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
+; LSX-LABEL: 'cost_abs_i8'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i8'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I8    = call i8 @llvm.abs.i8(i8 %a8, i1 0)
+  %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 0)
+  %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 0)
+  ret void
+}
+
+
+define void @cost_abs_i64_poison(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
+; LSX-LABEL: 'cost_abs_i64_poison'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i64_poison'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I64    = call i64 @llvm.abs.i64(i64 %a64, i1 -1)
+  %V2I64  = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 -1)
+  %V4I64  = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 -1)
+  ret void
+}
+
+define void @cost_abs_i32_poison(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
+; LSX-LABEL: 'cost_abs_i32_poison'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i32_poison'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I32   = call i32 @llvm.abs.i32(i32 %a32, i1 -1)
+  %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 -1)
+  %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 -1)
+  ret void
+}
+
+define void @cost_abs_i16_poison(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
+; LSX-LABEL: 'cost_abs_i16_poison'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i16_poison'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I16    = call i16 @llvm.abs.i16(i16 %a16, i1 -1)
+  %V8I16  = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 -1)
+  %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 -1)
+  ret void
+}
+
+define void @cost_abs_i8_poison(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
+; LSX-LABEL: 'cost_abs_i8_poison'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_abs_i8_poison'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I8    = call i8 @llvm.abs.i8(i8 %a8, i1 -1)
+  %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 -1)
+  %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 -1)
+  ret void
+}
+
+declare i64 @llvm.abs.i64(i64, i1)
+declare i32 @llvm.abs.i32(i32, i1)
+declare i16 @llvm.abs.i16(i16, i1)
+declare  i8 @llvm.abs.i8(i8, i1)
+
+declare <2 x i64>  @llvm.abs.v2i64(<2 x i64>, i1)
+declare <4 x i32>  @llvm.abs.v4i32(<4 x i32>, i1)
+declare <8 x i16>  @llvm.abs.v8i16(<8 x i16>, i1)
+declare <16 x i8>  @llvm.abs.v16i8(<16 x i8>, i1)
+
+declare <4 x i64>  @llvm.abs.v4i64(<4 x i64>, i1)
+declare <8 x i32>  @llvm.abs.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+declare <32 x i8>  @llvm.abs.v32i8(<32 x i8>, i1)
+
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-fma.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-fma.ll
new file mode 100644
index 0000000000000..40051b5c26c0d
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-fma.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @fma(i32 %arg) {
+; LSX-LABEL: 'fma'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:10 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:10 SizeLat:4 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'fma'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+  %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+  %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+
+  %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+  %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+  %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+
+  ret i32 undef
+}
+
+declare float @llvm.fma.f32(float, float, float)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
+
+declare double @llvm.fma.f64(double, double, double)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
index 6da0d72d7a2be..35eeb6c06e4d8 100644
--- a/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-fp.ll
@@ -11,7 +11,7 @@ define void @fadd() {
 ; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fadd double poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V4F64 = fadd <4 x double> poison, poison
-; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; LASX-LABEL: 'fadd'
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fadd float poison, poison
@@ -21,7 +21,7 @@ define void @fadd() {
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fadd double poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <4 x double> poison, poison
-; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
    %V1F32 = fadd float poison, poison
    %V2F32 = fadd <2 x float> poison, poison
@@ -44,7 +44,7 @@ define void @fsub() {
 ; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fsub double poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:6 SizeLat:1 for: %V4F64 = fsub <4 x double> poison, poison
-; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; LASX-LABEL: 'fsub'
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F32 = fsub float poison, poison
@@ -54,7 +54,7 @@ define void @fsub() {
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V1F64 = fsub double poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <4 x double> poison, poison
-; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
    %V1F32 = fsub float poison, poison
    %V2F32 = fsub <2 x float> poison, poison
@@ -77,7 +77,7 @@ define void @fmul() {
 ; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F64 = fmul double poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = fmul <2 x double> poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:1 for: %V4F64 = fmul <4 x double> poison, poison
-; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; LASX-LABEL: 'fmul'
 ; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F32 = fmul float poison, poison
@@ -87,7 +87,7 @@ define void @fmul() {
 ; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V1F64 = fmul double poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V2F64 = fmul <2 x double> poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V4F64 = fmul <4 x double> poison, poison
-; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
    %V1F32 = fmul float poison, poison
    %V2F32 = fmul <2 x float> poison, poison
@@ -110,7 +110,7 @@ define void @fdiv() {
 ; LSX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:12 SizeLat:4 for: %V1F64 = fdiv double poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V2F64 = fdiv <2 x double> poison, poison
 ; LSX-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:24 SizeLat:4 for: %V4F64 = fdiv <4 x double> poison, poison
-; LSX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; LASX-LABEL: 'fdiv'
 ; LASX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:9 SizeLat:4 for: %V1F32 = fdiv float poison, poison
@@ -120,7 +120,7 @@ define void @fdiv() {
 ; LASX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:12 SizeLat:4 for: %V1F64 = fdiv double poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V2F64 = fdiv <2 x double> poison, poison
 ; LASX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:12 SizeLat:4 for: %V4F64 = fdiv <4 x double> poison, poison
-; LASX-NEXT:  Cost Model: Found costs of 1 for: ret void
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
    %V1F32 = fdiv float poison, poison
    %V2F32 = fdiv <2 x float> poison, poison
@@ -133,3 +133,33 @@ define void @fdiv() {
 
    ret void
 }
+
+define i32 @fsqrt(i32 %arg) {
+; LSX-LABEL: 'fsqrt'
+; LSX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:15 SizeLat:1 for: %F32 = call float @llvm.sqrt.f32(float undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:25 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:4 Lat:50 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:22 SizeLat:1 for: %F64 = call double @llvm.sqrt.f64(double undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:22 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:4 Lat:44 SizeLat:4 for: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'fsqrt'
+; LASX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:15 SizeLat:1 for: %F32 = call float @llvm.sqrt.f32(float undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:25 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:25 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:22 SizeLat:1 for: %F64 = call double @llvm.sqrt.f64(double undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:22 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:22 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %F32 = call float @llvm.sqrt.f32(float undef)
+  %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+  %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+
+  %F64 = call double @llvm.sqrt.f64(double undef)
+  %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+  %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-log2.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-log2.ll
new file mode 100644
index 0000000000000..5aa03720399ad
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-log2.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define i32 @log2(i32 %arg) {
+; LSX-LABEL: 'log2'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %F32 = call float @llvm.log2.f32(float undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.log2.v4f32(<4 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:8 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.log2.v8f32(<8 x float> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %F64 = call double @llvm.log2.f64(double undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.log2.v2f64(<2 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:8 SizeLat:4 for: %V4F64 = call <4 x double> @llvm.log2.v4f64(<4 x double> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'log2'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %F32 = call float @llvm.log2.f32(float undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.log2.v4f32(<4 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.log2.v8f32(<8 x float> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %F64 = call double @llvm.log2.f64(double undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.log2.v2f64(<2 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.log2.v4f64(<4 x double> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %F32 = call float @llvm.log2.f32(float undef)
+  %V4F32 = call <4 x float> @llvm.log2.v4f32(<4 x float> undef)
+  %V8F32 = call <8 x float> @llvm.log2.v8f32(<8 x float> undef)
+
+  %F64 = call double @llvm.log2.f64(double undef)
+  %V2F64 = call <2 x double> @llvm.log2.v2f64(<2 x double> undef)
+  %V4F64 = call <4 x double> @llvm.log2.v4f64(<4 x double> undef)
+
+  ret i32 undef
+}
+
+declare float @llvm.log2.f32(float)
+declare <4 x float> @llvm.log2.v4f32(<4 x float>)
+declare <8 x float> @llvm.log2.v8f32(<8 x float>)
+
+declare double @llvm.log2.f64(double)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <4 x double> @llvm.log2.v4f64(<4 x double>)
+
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-sminmax.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-sminmax.ll
new file mode 100644
index 0000000000000..815a50a99fa27
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-sminmax.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64        @llvm.smax.i64(i64, i64)
+declare <2 x i64>  @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.smax.i32(i32, i32)
+declare <4 x i32>  @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.smax.i16(i16, i16)
+declare <8 x i16>  @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.smax.i8(i8,  i8)
+declare <16 x i8>  @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @smax(i32 %arg) {
+; LSX-LABEL: 'smax'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'smax'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
+
+declare i64        @llvm.smin.i64(i64, i64)
+declare <2 x i64>  @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.smin.i32(i32, i32)
+declare <4 x i32>  @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.smin.i16(i16, i16)
+declare <8 x i16>  @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.smin.i8(i8,  i8)
+declare <16 x i8>  @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @smin(i32 %arg) {
+; LSX-LABEL: 'smin'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'smin'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-ssat.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-ssat.ll
new file mode 100644
index 0000000000000..6e70852da62c4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-ssat.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64        @llvm.sadd.sat.i64(i64, i64)
+declare <2 x i64>  @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.sadd.sat.i32(i32, i32)
+declare <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.sadd.sat.i16(i16, i16)
+declare <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.sadd.sat.i8(i8,  i8)
+declare <16 x i8>  @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @add(i32 %arg) {
+; LSX-LABEL: 'add'
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'add'
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
+
+declare i64        @llvm.ssub.sat.i64(i64, i64)
+declare <2 x i64>  @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.ssub.sat.i32(i32, i32)
+declare <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.ssub.sat.i16(i16, i16)
+declare <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.ssub.sat.i8(i8,  i8)
+declare <16 x i8>  @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @sub(i32 %arg) {
+; LSX-LABEL: 'sub'
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 7 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'sub'
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 7 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-uminmax.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-uminmax.ll
new file mode 100644
index 0000000000000..4f7977cedf020
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-uminmax.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64        @llvm.umax.i64(i64, i64)
+declare <2 x i64>  @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.umax.i32(i32, i32)
+declare <4 x i32>  @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.umax.i16(i16, i16)
+declare <8 x i16>  @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.umax.i8(i8,  i8)
+declare <16 x i8>  @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @umax(i32 %arg) {
+; LSX-LABEL: 'umax'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'umax'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
+
+declare i64        @llvm.umin.i64(i64, i64)
+declare <2 x i64>  @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.umin.i32(i32, i32)
+declare <4 x i32>  @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.umin.i16(i16, i16)
+declare <8 x i16>  @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.umin.i8(i8,  i8)
+declare <16 x i8>  @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @umin(i32 %arg) {
+; LSX-LABEL: 'umin'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'umin'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/arith-usat.ll b/llvm/test/Analysis/CostModel/LoongArch/arith-usat.ll
new file mode 100644
index 0000000000000..daf9dca361900
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/arith-usat.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64        @llvm.uadd.sat.i64(i64, i64)
+declare <2 x i64>  @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.uadd.sat.i32(i32, i32)
+declare <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.uadd.sat.i16(i16, i16)
+declare <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.uadd.sat.i8(i8,  i8)
+declare <16 x i8>  @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @add(i32 %arg) {
+; LSX-LABEL: 'add'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'add'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
+
+declare i64        @llvm.usub.sat.i64(i64, i64)
+declare <2 x i64>  @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64>  @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>)
+
+declare i32        @llvm.usub.sat.i32(i32, i32)
+declare <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>)
+
+declare i16        @llvm.usub.sat.i16(i16, i16)
+declare <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
+
+declare i8         @llvm.usub.sat.i8(i8,  i8)
+declare <16 x i8>  @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <32 x i8>  @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
+
+define i32 @sub(i32 %arg) {
+; LSX-LABEL: 'sub'
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LSX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; LASX-LABEL: 'sub'
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; LASX-NEXT:  Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+  %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+  %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+  %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+
+  %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+  %V4I32  = call <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+  %V8I32  = call <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+
+  %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+  %V8I16  = call <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+  %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+
+  %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+  %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+  %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+
+  ret i32 undef
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/bitreverse.ll b/llvm/test/Analysis/CostModel/LoongArch/bitreverse.ll
new file mode 100644
index 0000000000000..e87243c7c0326
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/bitreverse.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64 @llvm.bitreverse.i64(i64)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i16 @llvm.bitreverse.i16(i16)
+declare  i8 @llvm.bitreverse.i8(i8)
+
+define i64 @var_bitreverse_i64(i64 %a) {
+; LSX-LABEL: 'var_bitreverse_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %bitreverse
+;
+  %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %bitreverse
+}
+
+define i32 @var_bitreverse_i32(i32 %a) {
+; LSX-LABEL: 'var_bitreverse_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %bitreverse
+;
+  %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %bitreverse
+}
+
+define i16 @var_bitreverse_i16(i16 %a) {
+; LSX-LABEL: 'var_bitreverse_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %bitreverse
+;
+  %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
+  ret i16 %bitreverse
+}
+
+define i8 @var_bitreverse_i8(i8 %a) {
+; LSX-LABEL: 'var_bitreverse_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %bitreverse
+;
+  %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a)
+  ret i8 %bitreverse
+}
+
+
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
+declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
+
+declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
+declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
+declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
+declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
+
+define <2 x i64> @var_bitreverse_v2i64(<2 x i64> %a) {
+; LSX-LABEL: 'var_bitreverse_v2i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:6 Lat:14 SizeLat:6 for: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v2i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:6 Lat:14 SizeLat:6 for: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bitreverse
+;
+  %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
+  ret <2 x i64> %bitreverse
+}
+
+define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
+; LSX-LABEL: 'var_bitreverse_v4i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:12 Lat:28 SizeLat:12 for: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v4i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:42 CodeSize:18 Lat:34 SizeLat:18 for: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bitreverse
+;
+  %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
+  ret <4 x i64> %bitreverse
+}
+
+define <4 x i32> @var_bitreverse_v4i32(<4 x i32> %a) {
+; LSX-LABEL: 'var_bitreverse_v4i32'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v4i32'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bitreverse
+;
+  %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
+  ret <4 x i32> %bitreverse
+}
+
+define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
+; LSX-LABEL: 'var_bitreverse_v8i32'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v8i32'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bitreverse
+;
+  %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
+  ret <8 x i32> %bitreverse
+}
+
+define <8 x i16> @var_bitreverse_v8i16(<8 x i16> %a) {
+; LSX-LABEL: 'var_bitreverse_v8i16'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v8i16'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bitreverse
+;
+  %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
+  ret <8 x i16> %bitreverse
+}
+
+define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
+; LSX-LABEL: 'var_bitreverse_v16i16'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v16i16'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bitreverse
+;
+  %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
+  ret <16 x i16> %bitreverse
+}
+
+define <16 x i8> @var_bitreverse_v16i8(<16 x i8> %a) {
+; LSX-LABEL: 'var_bitreverse_v16i8'
+; LSX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v16i8'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bitreverse
+;
+  %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
+  ret <16 x i8> %bitreverse
+}
+
+define <32 x i8> @var_bitreverse_v32i8(<32 x i8> %a) {
+; LSX-LABEL: 'var_bitreverse_v32i8'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bitreverse
+;
+; LASX-LABEL: 'var_bitreverse_v32i8'
+; LASX-NEXT:  Cost Model: Found costs of 2 for: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bitreverse
+;
+  %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
+  ret <32 x i8> %bitreverse
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/bswap.ll b/llvm/test/Analysis/CostModel/LoongArch/bswap.ll
new file mode 100644
index 0000000000000..28cad3c138542
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/bswap.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
+; LSX-LABEL: 'cost_bswap_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_bswap_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I64    = call i64 @llvm.bswap.i64(i64 %a64)
+  %V2I64  = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+  %V4I64  = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+  ret void
+}
+
+define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
+; LSX-LABEL: 'cost_bswap_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_bswap_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I32   = call i32 @llvm.bswap.i32(i32 %a32)
+  %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+  %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+  ret void
+}
+
+define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
+; LSX-LABEL: 'cost_bswap_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:2 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; LASX-LABEL: 'cost_bswap_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %I16    = call i16 @llvm.bswap.i16(i16 %a16)
+  %V8I16  = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+  %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.bswap.i32(i32)
+declare i16 @llvm.bswap.i16(i16)
+
+declare <2 x i64>  @llvm.bswap.v2i64(<2 x i64>)
+declare <4 x i32>  @llvm.bswap.v4i32(<4 x i32>)
+declare <8 x i16>  @llvm.bswap.v8i16(<8 x i16>)
+
+declare <4 x i64>  @llvm.bswap.v4i64(<4 x i64>)
+declare <8 x i32>  @llvm.bswap.v8i32(<8 x i32>)
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
diff --git a/llvm/test/Analysis/CostModel/LoongArch/ctlz.ll b/llvm/test/Analysis/CostModel/LoongArch/ctlz.ll
new file mode 100644
index 0000000000000..bae2bd67af513
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/ctlz.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare  i8 @llvm.ctlz.i8(i8, i1)
+
+define i64 @var_ctlz_i64(i64 %a) {
+; LSX-LABEL: 'var_ctlz_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctlz
+;
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
+  ret i64 %ctlz
+}
+
+define i64 @var_ctlz_i64u(i64 %a) {
+; LSX-LABEL: 'var_ctlz_i64u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i64u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctlz
+;
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
+  ret i64 %ctlz
+}
+
+define i32 @var_ctlz_i32(i32 %a) {
+; LSX-LABEL: 'var_ctlz_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctlz
+;
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
+  ret i32 %ctlz
+}
+
+define i32 @var_ctlz_i32u(i32 %a) {
+; LSX-LABEL: 'var_ctlz_i32u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i32u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctlz
+;
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
+  ret i32 %ctlz
+}
+
+define i16 @var_ctlz_i16(i16 %a) {
+; LSX-LABEL: 'var_ctlz_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctlz
+;
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
+  ret i16 %ctlz
+}
+
+define i16 @var_ctlz_i16u(i16 %a) {
+; LSX-LABEL: 'var_ctlz_i16u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i16u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctlz
+;
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
+  ret i16 %ctlz
+}
+
+define i8 @var_ctlz_i8(i8 %a) {
+; LSX-LABEL: 'var_ctlz_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctlz
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
+  ret i8 %ctlz
+}
+
+define i8 @var_ctlz_i8u(i8 %a) {
+; LSX-LABEL: 'var_ctlz_i8u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctlz
+;
+; LASX-LABEL: 'var_ctlz_i8u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctlz
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
+  ret i8 %ctlz
+}
+
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
+
+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
+
+define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
+; LSX-LABEL: 'var_ctlz_v2i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v2i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
+;
+  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %ctlz
+}
+
+define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
+; LSX-LABEL: 'var_ctlz_v2i64u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v2i64u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
+;
+  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
+  ret <2 x i64> %ctlz
+}
+
+define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
+; LSX-LABEL: 'var_ctlz_v4i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v4i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
+;
+  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %ctlz
+}
+
+define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
+; LSX-LABEL: 'var_ctlz_v4i64u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v4i64u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
+;
+  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
+  ret <4 x i64> %ctlz
+}
+
+define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) {
+; LSX-LABEL: 'var_ctlz_v8i64u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:8 Lat:8 SizeLat:8 for: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v8i64u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %ctlz
+;
+  %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 1)
+  ret <8 x i64> %ctlz
+}
+
+define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
+; LSX-LABEL: 'var_ctlz_v4i32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v4i32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctlz
+;
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %ctlz
+}
+
+define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
+; LSX-LABEL: 'var_ctlz_v4i32u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v4i32u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctlz
+;
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
+  ret <4 x i32> %ctlz
+}
+
+define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
+; LSX-LABEL: 'var_ctlz_v8i16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v8i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctlz
+;
+  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %ctlz
+}
+
+define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
+; LSX-LABEL: 'var_ctlz_v8i16u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v8i16u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctlz
+;
+  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
+  ret <8 x i16> %ctlz
+}
+
+define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
+; LSX-LABEL: 'var_ctlz_v16i16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v16i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctlz
+;
+  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %ctlz
+}
+
+define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
+; LSX-LABEL: 'var_ctlz_v16i16u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v16i16u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctlz
+;
+  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
+  ret <16 x i16> %ctlz
+}
+
+define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
+; LSX-LABEL: 'var_ctlz_v16i8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v16i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctlz
+;
+  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %ctlz
+}
+
+define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
+; LSX-LABEL: 'var_ctlz_v16i8u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v16i8u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctlz
+;
+  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
+  ret <16 x i8> %ctlz
+}
+
+define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
+; LSX-LABEL: 'var_ctlz_v32i8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v32i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctlz
+;
+  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %ctlz
+}
+
+define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
+; LSX-LABEL: 'var_ctlz_v32i8u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctlz
+;
+; LASX-LABEL: 'var_ctlz_v32i8u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctlz
+;
+  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
+  ret <32 x i8> %ctlz
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/ctpop.ll b/llvm/test/Analysis/CostModel/LoongArch/ctpop.ll
new file mode 100644
index 0000000000000..7f0ca297b83b5
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/ctpop.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64 @llvm.ctpop.i64(i64)
+declare i32 @llvm.ctpop.i32(i32)
+declare i16 @llvm.ctpop.i16(i16)
+declare  i8 @llvm.ctpop.i8(i8)
+
+define i64 @var_ctpop_i64(i64 %a) {
+; LSX-LABEL: 'var_ctpop_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctpop
+;
+; LASX-LABEL: 'var_ctpop_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %ctpop
+;
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %ctpop
+}
+
+define i32 @var_ctpop_i32(i32 %a) {
+; LSX-LABEL: 'var_ctpop_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctpop
+;
+; LASX-LABEL: 'var_ctpop_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %ctpop
+;
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %ctpop
+}
+
+define i16 @var_ctpop_i16(i16 %a) {
+; LSX-LABEL: 'var_ctpop_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctpop
+;
+; LASX-LABEL: 'var_ctpop_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %ctpop
+;
+  %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+  ret i16 %ctpop
+}
+
+define i8 @var_ctpop_i8(i8 %a) {
+; LSX-LABEL: 'var_ctpop_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctpop
+;
+; LASX-LABEL: 'var_ctpop_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %ctpop
+;
+  %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+  ret i8 %ctpop
+}
+
+
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
+
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
+declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
+
+define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
+; LSX-LABEL: 'var_ctpop_v2i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v2i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctpop
+;
+  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %ctpop
+}
+
+define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
+; LSX-LABEL: 'var_ctpop_v4i64'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v4i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctpop
+;
+  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+  ret <4 x i64> %ctpop
+}
+
+define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
+; LSX-LABEL: 'var_ctpop_v4i32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v4i32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %ctpop
+;
+  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+  ret <4 x i32> %ctpop
+}
+
+define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
+; LSX-LABEL: 'var_ctpop_v8i32'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v8i32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %ctpop
+;
+  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+  ret <8 x i32> %ctpop
+}
+
+define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
+; LSX-LABEL: 'var_ctpop_v8i16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v8i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %ctpop
+;
+  %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+  ret <8 x i16> %ctpop
+}
+
+define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
+; LSX-LABEL: 'var_ctpop_v16i16'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v16i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %ctpop
+;
+  %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+  ret <16 x i16> %ctpop
+}
+
+define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
+; LSX-LABEL: 'var_ctpop_v16i8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v16i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %ctpop
+;
+  %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+  ret <16 x i8> %ctpop
+}
+
+define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
+; LSX-LABEL: 'var_ctpop_v32i8'
+; LSX-NEXT:  Cost Model: Found costs of 4 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
+;
+; LASX-LABEL: 'var_ctpop_v32i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %ctpop
+;
+  %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+  ret <32 x i8> %ctpop
+}
diff --git a/llvm/test/Analysis/CostModel/LoongArch/cttz.ll b/llvm/test/Analysis/CostModel/LoongArch/cttz.ll
new file mode 100644
index 0000000000000..caae613f41c81
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/LoongArch/cttz.ll
@@ -0,0 +1,331 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lsx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LSX
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all -mtriple=loongarch64 -mattr=+lasx 2>&1 -disable-output < %s | FileCheck %s --check-prefixes=LASX
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare  i8 @llvm.cttz.i8(i8, i1)
+
+define i64 @var_cttz_i64(i64 %a) {
+; LSX-LABEL: 'var_cttz_i64'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %cttz
+;
+; LASX-LABEL: 'var_cttz_i64'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %cttz
+;
+  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
+  ret i64 %cttz
+}
+
+define i64 @var_cttz_i64u(i64 %a) {
+; LSX-LABEL: 'var_cttz_i64u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %cttz
+;
+; LASX-LABEL: 'var_cttz_i64u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %cttz
+;
+  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
+  ret i64 %cttz
+}
+
+define i32 @var_cttz_i32(i32 %a) {
+; LSX-LABEL: 'var_cttz_i32'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %cttz
+;
+; LASX-LABEL: 'var_cttz_i32'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %cttz
+;
+  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
+  ret i32 %cttz
+}
+
+define i32 @var_cttz_i32u(i32 %a) {
+; LSX-LABEL: 'var_cttz_i32u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %cttz
+;
+; LASX-LABEL: 'var_cttz_i32u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %cttz
+;
+  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
+  ret i32 %cttz
+}
+
+define i16 @var_cttz_i16(i16 %a) {
+; LSX-LABEL: 'var_cttz_i16'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %cttz
+;
+; LASX-LABEL: 'var_cttz_i16'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %cttz
+;
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
+  ret i16 %cttz
+}
+
+define i16 @var_cttz_i16u(i16 %a) {
+; LSX-LABEL: 'var_cttz_i16u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %cttz
+;
+; LASX-LABEL: 'var_cttz_i16u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %cttz
+;
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
+  ret i16 %cttz
+}
+
+define i8 @var_cttz_i8(i8 %a) {
+; LSX-LABEL: 'var_cttz_i8'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %cttz
+;
+; LASX-LABEL: 'var_cttz_i8'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %cttz
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
+  ret i8 %cttz
+}
+
+define i8 @var_cttz_i8u(i8 %a) {
+; LSX-LABEL: 'var_cttz_i8u'
+; LSX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %cttz
+;
+; LASX-LABEL: 'var_cttz_i8u'
+; LASX-NEXT:  Cost Model: Found costs of 1 for: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %cttz
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
+  ret i8 %cttz
+}
+
+
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
+
+declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
+
+define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
+; LSX-LABEL: 'var_cttz_v2i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:6 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
+;
+; LASX-LABEL: 'var_cttz_v2i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:6 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
+;
+  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %cttz
+}
+
+define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
+; LSX-LABEL: 'var_cttz_v2i64u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:6 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
+;
+; LASX-LABEL: 'var_cttz_v2i64u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:6 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
+;
+  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
+  ret <2 x i64> %cttz
+}
+
+define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
+; LSX-LABEL: 'var_cttz_v4i64'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:12 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
+;
+; LASX-LABEL: 'var_cttz_v4i64'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:18 Lat:4 SizeLat:18 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
+;
+  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %cttz
+}
+
+define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
+; LSX-LABEL: 'var_cttz_v4i64u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:12 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
+;
+; LASX-LABEL: 'var_cttz_v4i64u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:18 Lat:4 SizeLat:18 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
+;
+  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
+  ret <4 x i64> %cttz
+}
+
+define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
+; LSX-LABEL: 'var_cttz_v4i32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:12 Lat:4 SizeLat:12 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
+;
+; LASX-LABEL: 'var_cttz_v4i32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:12 Lat:4 SizeLat:12 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
+;
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %cttz
+}
+
+define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
+; LSX-LABEL: 'var_cttz_v4i32u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:12 Lat:4 SizeLat:12 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
+;
+; LASX-LABEL: 'var_cttz_v4i32u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:12 Lat:4 SizeLat:12 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
+;
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
+  ret <4 x i32> %cttz
+}
+
+define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
+; LSX-LABEL: 'var_cttz_v8i32'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:24 Lat:8 SizeLat:24 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
+;
+; LASX-LABEL: 'var_cttz_v8i32'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:36 Lat:4 SizeLat:36 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
+;
+  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
+  ret <8 x i32> %cttz
+}
+
+define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
+; LSX-LABEL: 'var_cttz_v8i32u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:24 Lat:8 SizeLat:24 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
+;
+; LASX-LABEL: 'var_cttz_v8i32u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:36 Lat:4 SizeLat:36 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
+;
+  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
+  ret <8 x i32> %cttz
+}
+
+define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
+; LSX-LABEL: 'var_cttz_v8i16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:24 Lat:4 SizeLat:24 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
+;
+; LASX-LABEL: 'var_cttz_v8i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:24 Lat:4 SizeLat:24 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
+;
+  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %cttz
+}
+
+define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
+; LSX-LABEL: 'var_cttz_v8i16u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:24 Lat:4 SizeLat:24 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
+;
+; LASX-LABEL: 'var_cttz_v8i16u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:24 Lat:4 SizeLat:24 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
+;
+  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
+  ret <8 x i16> %cttz
+}
+
+define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
+; LSX-LABEL: 'var_cttz_v16i16'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:48 Lat:8 SizeLat:48 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
+;
+; LASX-LABEL: 'var_cttz_v16i16'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:72 Lat:4 SizeLat:72 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
+;
+  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %cttz
+}
+
+define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
+; LSX-LABEL: 'var_cttz_v16i16u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:48 Lat:8 SizeLat:48 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
+;
+; LASX-LABEL: 'var_cttz_v16i16u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:72 Lat:4 SizeLat:72 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
+;
+  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
+  ret <16 x i16> %cttz
+}
+
+define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
+; LSX-LABEL: 'var_cttz_v16i8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:48 Lat:4 SizeLat:48 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
+;
+; LASX-LABEL: 'var_cttz_v16i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:48 Lat:4 SizeLat:48 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
+;
+  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %cttz
+}
+
+define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
+; LSX-LABEL: 'var_cttz_v16i8u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:48 Lat:4 SizeLat:48 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
+;
+; LASX-LABEL: 'var_cttz_v16i8u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:48 Lat:4 SizeLat:48 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
+;
+  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
+  ret <16 x i8> %cttz
+}
+
+define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
+; LSX-LABEL: 'var_cttz_v32i8'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:96 Lat:8 SizeLat:96 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
+;
+; LASX-LABEL: 'var_cttz_v32i8'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:144 Lat:4 SizeLat:144 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
+;
+  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %cttz
+}
+
+define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
+; LSX-LABEL: 'var_cttz_v32i8u'
+; LSX-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:96 Lat:8 SizeLat:96 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; LSX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
+;
+; LASX-LABEL: 'var_cttz_v32i8u'
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:144 Lat:4 SizeLat:144 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; LASX-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
+;
+  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
+  ret <32 x i8> %cttz
+}



More information about the llvm-commits mailing list