[llvm-branch-commits] [llvm] [LoongArch] Perform DAG combine for MUL to generate `[x]vmulw{ev/od}` (PR #161368)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 9 04:43:34 PDT 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/161368
>From ba68f214a72e8867718e7624b62b21c32e19a98d Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 30 Sep 2025 20:53:06 +0800
Subject: [PATCH 1/3] [LoongArch] Perform DAG combine for MUL to generate
`[x]vmulw{ev/od}`
---
.../LoongArch/LoongArchISelLowering.cpp | 118 +
.../Target/LoongArch/LoongArchISelLowering.h | 11 +-
.../LoongArch/LoongArchLASXInstrInfo.td | 41 +
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 48 +
.../lasx/ir-instruction/mulwev_od.ll | 2268 +----------------
.../LoongArch/lsx/ir-instruction/mulwev_od.ll | 186 +-
6 files changed, 373 insertions(+), 2299 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 7ddf996f53f4c..2763cef394620 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -462,6 +462,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasExtLSX()) {
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(ISD::MUL);
}
// Set DAG combine for 'LASX' feature.
@@ -6679,6 +6680,115 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT ResTy = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 &&
+ ResTy != MVT::v16i16 && ResTy != MVT::v8i32 &&
+ ResTy != MVT::v4i64) // && ResTy != MVT::v2i128)
+ return SDValue();
+
+ // Combine:
+ // ti,tii,...,tx = extract_vector_elt t0, {0,2,4,.../1,3,5,...}
+ // tj,tjj,...,ty = extract_vector_elt t1, {0,2,4,.../1,3,5,...}
+ // tm = BUILD_VECTOR ti,tii,...,tx
+ // tn = BUILD_VECTOR tj,tjj,...,ty
+ // ta = {sign/zero}_extend tm
+ // tb = {sign/zero}_extend tn
+ // tr = mul ta, tb
+ // to:
+ // tr = VMULW{EV/OD}[U/US] t0, t1
+ auto getExtType = [](unsigned Op0, unsigned Op1) -> unsigned {
+ if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::SIGN_EXTEND)
+ return 0;
+ if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::ZERO_EXTEND)
+ return 1;
+ if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::SIGN_EXTEND)
+ return 2;
+ if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::ZERO_EXTEND)
+ return 3;
+ return -1;
+ };
+
+ unsigned ExtType = getExtType(N0.getOpcode(), N1.getOpcode());
+ if (ExtType < 0)
+ return SDValue();
+
+ SDValue BV0 = N0.getOperand(0);
+ SDValue BV1 = N1.getOperand(0);
+ if (BV0.getOpcode() != ISD::BUILD_VECTOR ||
+ BV1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ unsigned ResBits = ResTy.getScalarType().getSizeInBits();
+ unsigned BV0Bits = BV0.getValueType().getScalarType().getSizeInBits();
+ unsigned BV1Bits = BV1.getValueType().getScalarType().getSizeInBits();
+ if (BV0Bits != BV1Bits || ResBits != BV0Bits * 2)
+ return SDValue();
+
+ unsigned Index;
+ SDValue OrigN0, OrigN1;
+ for (unsigned i = 0; i < BV0.getNumOperands(); ++i) {
+ SDValue Op0 = BV0.getOperand(i);
+ SDValue Op1 = BV1.getOperand(i);
+ // Each element of BUILD_VECTOR must be EXTRACT_VECTOR_ELT.
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Check each EXTRACT_VECTOR_ELT's source vector and index.
+ if (Op0.getOperand(1) != Op1.getOperand(1))
+ return SDValue();
+
+ auto *IdxC = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (!IdxC)
+ return SDValue();
+ unsigned CurIdx = IdxC->getZExtValue();
+
+ if (i == 0) {
+ if (CurIdx != 0 && CurIdx != 1)
+ return SDValue();
+ OrigN0 = Op0.getOperand(0);
+ OrigN1 = Op1.getOperand(0);
+ } else {
+ if (CurIdx != Index + 2)
+ return SDValue();
+ if (Op0.getOperand(0) != OrigN0 || Op1.getOperand(0) != OrigN1)
+ return SDValue();
+ }
+ Index = CurIdx;
+ }
+
+ if (OrigN0.getValueType() != OrigN1.getValueType())
+ return SDValue();
+ if (OrigN0.getValueType().getVectorNumElements() !=
+ ResTy.getVectorNumElements() * 2)
+ return SDValue();
+
+ SDValue Result;
+ EVT OrigTy = OrigN0.getValueType();
+ bool IsEven = (Index % 2 == 0);
+
+ static const unsigned OpcTable[3][2] = {
+ {LoongArchISD::VMULWOD, LoongArchISD::VMULWEV},
+ {LoongArchISD::VMULWODU, LoongArchISD::VMULWEVU},
+ {LoongArchISD::VMULWODUS, LoongArchISD::VMULWEVUS}};
+
+ if (ExtType == 3)
+ Result = DAG.getNode(OpcTable[2][IsEven], DL, OrigTy, OrigN1, OrigN0);
+ else
+ Result = DAG.getNode(OpcTable[ExtType][IsEven], DL, OrigTy, OrigN0, OrigN1);
+
+ return DAG.getBitcast(ResTy, Result);
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6714,6 +6824,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case ISD::MUL:
+ return performMULCombine(N, DAG, DCI);
}
return SDValue();
}
@@ -7526,6 +7638,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XVMSKEQZ)
NODE_NAME_CASE(XVMSKNEZ)
NODE_NAME_CASE(VHADDW)
+ NODE_NAME_CASE(VMULWEV)
+ NODE_NAME_CASE(VMULWOD)
+ NODE_NAME_CASE(VMULWEVU)
+ NODE_NAME_CASE(VMULWODU)
+ NODE_NAME_CASE(VMULWEVUS)
+ NODE_NAME_CASE(VMULWODUS)
}
#undef NODE_NAME_CASE
return nullptr;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..1e5632eb00f7b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -189,7 +189,16 @@ enum NodeType : unsigned {
XVMSKNEZ,
// Vector Horizontal Addition with Widening‌
- VHADDW
+ VHADDW,
+
+ // Perform element-wise vector multiplication at even/odd indices,
+ // and keep each result in its corresponding widened slot
+ VMULWEV,
+ VMULWOD,
+ VMULWEVU,
+ VMULWODU,
+ VMULWEVUS,
+ VMULWODUS
// Intrinsic operations end =============================================
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 5143d53bad719..7c28efd88ae09 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1328,6 +1328,39 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
}
}
+multiclass XVmulwPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass XVmulwuPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_BU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_HU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_WU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_DU") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass XVmulwusPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_BU_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_HU_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_WU_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_DU_D") LASX256:$xj, LASX256:$xk)>;
+}
+
let Predicates = [HasExtLASX] in {
// XVADD_{B/H/W/D}
@@ -1365,6 +1398,14 @@ defm : PatXrXr<mul, "XVMUL">;
defm : PatXrXr<mulhs, "XVMUH">;
defm : PatXrXrU<mulhu, "XVMUH">;
+// XVMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], XVMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D}
+defm : XVmulwPat<loongarch_vmulwev, "XVMULWEV">;
+defm : XVmulwPat<loongarch_vmulwod, "XVMULWOD">;
+defm : XVmulwuPat<loongarch_vmulwevu, "XVMULWEV">;
+defm : XVmulwuPat<loongarch_vmulwodu, "XVMULWOD">;
+defm : XVmulwusPat<loongarch_vmulwevus, "XVMULWEV">;
+defm : XVmulwusPat<loongarch_vmulwodus, "XVMULWOD">;
+
// XVMADD_{B/H/W/D}
defm : PatXrXrXr<muladd, "XVMADD">;
// XVMSUB_{B/H/W/D}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 8d1dc99e316c9..e34f6d7e58610 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -82,6 +82,13 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;
+def loongarch_vmulwev: SDNode<"LoongArchISD::VMULWEV", SDT_LoongArchV2R>;
+def loongarch_vmulwod: SDNode<"LoongArchISD::VMULWOD", SDT_LoongArchV2R>;
+def loongarch_vmulwevu: SDNode<"LoongArchISD::VMULWEVU", SDT_LoongArchV2R>;
+def loongarch_vmulwodu: SDNode<"LoongArchISD::VMULWODU", SDT_LoongArchV2R>;
+def loongarch_vmulwevus: SDNode<"LoongArchISD::VMULWEVUS", SDT_LoongArchV2R>;
+def loongarch_vmulwodus: SDNode<"LoongArchISD::VMULWODUS", SDT_LoongArchV2R>;
+
def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
@@ -1518,6 +1525,39 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VmulwPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass VmulwuPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_BU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_HU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_WU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_DU") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass VmulwusPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_BU_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_HU_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_WU_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_DU_D") LSX128:$vj, LSX128:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1555,6 +1595,14 @@ defm : PatVrVr<mul, "VMUL">;
defm : PatVrVr<mulhs, "VMUH">;
defm : PatVrVrU<mulhu, "VMUH">;
+// VMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], VMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D}
+defm : VmulwPat<loongarch_vmulwev, "VMULWEV">;
+defm : VmulwPat<loongarch_vmulwod, "VMULWOD">;
+defm : VmulwuPat<loongarch_vmulwevu, "VMULWEV">;
+defm : VmulwuPat<loongarch_vmulwodu, "VMULWOD">;
+defm : VmulwusPat<loongarch_vmulwevus, "VMULWEV">;
+defm : VmulwusPat<loongarch_vmulwodus, "VMULWOD">;
+
// VMADD_{B/H/W/D}
defm : PatVrVrVr<muladd, "VMADD">;
// VMSUB_{B/H/W/D}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
index c8796b839913c..605325f4dc4f4 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
@@ -5,109 +5,9 @@
define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -127,59 +27,7 @@ define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 6
-; CHECK-NEXT: vpickve2gr.h $a2, $vr2, 4
-; CHECK-NEXT: vpickve2gr.h $a3, $vr2, 2
-; CHECK-NEXT: vpickve2gr.h $a4, $vr2, 0
-; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 6
-; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 4
-; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 2
-; CHECK-NEXT: vpickve2gr.h $t0, $vr0, 0
-; CHECK-NEXT: xvpermi.d $xr0, $xr1, 14
-; CHECK-NEXT: vpickve2gr.h $t1, $vr0, 6
-; CHECK-NEXT: vpickve2gr.h $t2, $vr0, 4
-; CHECK-NEXT: vpickve2gr.h $t3, $vr0, 2
-; CHECK-NEXT: vpickve2gr.h $t4, $vr0, 0
-; CHECK-NEXT: vpickve2gr.h $t5, $vr1, 6
-; CHECK-NEXT: vpickve2gr.h $t6, $vr1, 4
-; CHECK-NEXT: vpickve2gr.h $t7, $vr1, 2
-; CHECK-NEXT: vpickve2gr.h $t8, $vr1, 0
-; CHECK-NEXT: ext.w.h $t0, $t0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; CHECK-NEXT: ext.w.h $a7, $a7
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; CHECK-NEXT: ext.w.h $a6, $a6
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; CHECK-NEXT: ext.w.h $a5, $a5
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; CHECK-NEXT: ext.w.h $a4, $a4
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; CHECK-NEXT: ext.w.h $a3, $a3
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; CHECK-NEXT: ext.w.h $a2, $a2
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; CHECK-NEXT: ext.w.h $a1, $a1
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ext.w.h $a1, $t8
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t7
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t6
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t5
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: ext.w.h $a1, $t4
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t3
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t2
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t1
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -195,73 +43,13 @@ entry:
}
define void @vmulwev_d_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: srai.w $a1, $t0, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 2
-; LA32-NEXT: srai.w $a1, $a7, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a6, 0
-; LA32-NEXT: srai.w $a1, $a6, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a5, 2
-; LA32-NEXT: srai.w $a1, $a5, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -423,109 +211,9 @@ entry:
define void @vmulwod_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -545,59 +233,7 @@ define void @vmulwod_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 7
-; CHECK-NEXT: vpickve2gr.h $a2, $vr2, 5
-; CHECK-NEXT: vpickve2gr.h $a3, $vr2, 3
-; CHECK-NEXT: vpickve2gr.h $a4, $vr2, 1
-; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 7
-; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 5
-; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 3
-; CHECK-NEXT: vpickve2gr.h $t0, $vr0, 1
-; CHECK-NEXT: xvpermi.d $xr0, $xr1, 14
-; CHECK-NEXT: vpickve2gr.h $t1, $vr0, 7
-; CHECK-NEXT: vpickve2gr.h $t2, $vr0, 5
-; CHECK-NEXT: vpickve2gr.h $t3, $vr0, 3
-; CHECK-NEXT: vpickve2gr.h $t4, $vr0, 1
-; CHECK-NEXT: vpickve2gr.h $t5, $vr1, 7
-; CHECK-NEXT: vpickve2gr.h $t6, $vr1, 5
-; CHECK-NEXT: vpickve2gr.h $t7, $vr1, 3
-; CHECK-NEXT: vpickve2gr.h $t8, $vr1, 1
-; CHECK-NEXT: ext.w.h $t0, $t0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; CHECK-NEXT: ext.w.h $a7, $a7
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; CHECK-NEXT: ext.w.h $a6, $a6
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; CHECK-NEXT: ext.w.h $a5, $a5
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; CHECK-NEXT: ext.w.h $a4, $a4
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; CHECK-NEXT: ext.w.h $a3, $a3
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; CHECK-NEXT: ext.w.h $a2, $a2
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; CHECK-NEXT: ext.w.h $a1, $a1
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ext.w.h $a1, $t8
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t7
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t6
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t5
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: ext.w.h $a1, $t4
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t3
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t2
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t1
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -613,73 +249,13 @@ entry:
}
define void @vmulwod_d_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: srai.w $a1, $t0, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 2
-; LA32-NEXT: srai.w $a1, $a7, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a6, 0
-; LA32-NEXT: srai.w $a1, $a6, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a5, 2
-; LA32-NEXT: srai.w $a1, $a5, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -841,109 +417,9 @@ entry:
define void @vmulwev_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -959,125 +435,13 @@ entry:
}
define void @vmulwev_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1091,63 +455,13 @@ entry:
}
define void @vmulwev_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvld $xr2, $a2, 0
-; LA32-NEXT: xvori.b $xr3, $xr1, 0
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr4, $xr0, 2
-; LA32-NEXT: xvinsve0.w $xr3, $xr4, 2
-; LA32-NEXT: xvpickve.w $xr4, $xr0, 4
-; LA32-NEXT: xvinsve0.w $xr3, $xr4, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 6
-; LA32-NEXT: xvinsve0.w $xr1, $xr2, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 2
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 4
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 6
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr3, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -1255,109 +569,9 @@ entry:
define void @vmulwod_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1373,125 +587,13 @@ entry:
}
define void @vmulwod_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1505,65 +607,13 @@ entry:
}
define void @vmulwod_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 1
-; LA32-NEXT: xvori.b $xr4, $xr2, 0
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 3
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 2
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 5
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA32-NEXT: xvinsve0.w $xr4, $xr0, 6
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr4, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -1671,109 +721,9 @@ entry:
define void @vmulwev_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1789,125 +739,13 @@ entry:
}
define void @vmulwev_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu_h:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: ext.w.h $a1, $t8
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: ext.w.h $a1, $t4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu_h:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: ext.w.h $a1, $t8
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t7
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t6
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t5
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: ext.w.h $a1, $t4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1921,68 +759,13 @@ entry:
}
define void @vmulwev_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: xvrepli.b $xr0, 0
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 0
-; LA32-NEXT: xvpickve.w $xr2, $xr1, 2
-; LA32-NEXT: xvinsve0.w $xr0, $xr2, 2
-; LA32-NEXT: xvpickve.w $xr2, $xr1, 4
-; LA32-NEXT: xvinsve0.w $xr0, $xr2, 4
-; LA32-NEXT: xvpickve.w $xr1, $xr1, 6
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvmul.d $xr0, $xr0, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -2122,109 +905,9 @@ entry:
define void @vmulwod_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -2240,125 +923,13 @@ entry:
}
define void @vmulwod_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu_h:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: ext.w.h $a1, $t8
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: ext.w.h $a1, $t4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu_h:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: ext.w.h $a1, $t8
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t7
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t6
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t5
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: ext.w.h $a1, $t4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -2372,69 +943,13 @@ entry:
}
define void @vmulwod_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -2574,109 +1089,9 @@ entry:
define void @vmulwev_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -2692,125 +1107,13 @@ entry:
}
define void @vmulwev_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu_h_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: ext.w.h $t0, $t0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: ext.w.h $a7, $a7
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: ext.w.h $a6, $a6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: ext.w.h $a5, $a5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: ext.w.h $a1, $a1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu_h_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: ext.w.h $t0, $t0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: ext.w.h $a7, $a7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: ext.w.h $a6, $a6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: ext.w.h $a5, $a5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: ext.w.h $a4, $a4
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: ext.w.h $a3, $a3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: ext.w.h $a2, $a2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu_h_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -2824,68 +1127,13 @@ entry:
}
define void @vmulwev_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu_w_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvrepli.b $xr0, 0
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr1, 2
-; LA32-NEXT: xvinsve0.w $xr0, $xr3, 2
-; LA32-NEXT: xvpickve.w $xr3, $xr1, 4
-; LA32-NEXT: xvinsve0.w $xr0, $xr3, 4
-; LA32-NEXT: xvpickve.w $xr1, $xr1, 6
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 6
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr0
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu_w_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu_w_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -3025,109 +1273,9 @@ entry:
define void @vmulwod_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu_b_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -3143,125 +1291,13 @@ entry:
}
define void @vmulwod_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu_h_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: ext.w.h $t0, $t0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: ext.w.h $a7, $a7
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: ext.w.h $a6, $a6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: ext.w.h $a5, $a5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: ext.w.h $a1, $a1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu_h_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: ext.w.h $t0, $t0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: ext.w.h $a7, $a7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: ext.w.h $a6, $a6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: ext.w.h $a5, $a5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: ext.w.h $a4, $a4
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: ext.w.h $a3, $a3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: ext.w.h $a2, $a2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu_h_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -3275,69 +1311,13 @@ entry:
}
define void @vmulwod_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu_w_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvrepli.b $xr3, 0
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr3
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu_w_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu_w_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
index cd83c1dff652f..19b5ab50eef95 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
@@ -7,11 +7,7 @@ define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -31,11 +27,7 @@ define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -55,11 +47,7 @@ define void @vmulwev_d_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -155,13 +143,7 @@ define void @vmulwod_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 49
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vshuf4i.b $vr1, $vr1, 49
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -181,13 +163,7 @@ define void @vmulwod_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 49
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vshuf4i.h $vr1, $vr1, 49
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,13 +183,7 @@ define void @vmulwod_d_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 49
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 49
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -308,13 +278,8 @@ define void @vmulwev_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr3, $vr0, $vr1
-; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -332,15 +297,9 @@ entry:
define void @vmulwev_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_w_hu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vori.b $vr4, $vr0, 0
-; CHECK-NEXT: vshuf.h $vr4, $vr3, $vr1
-; CHECK-NEXT: vshuf.h $vr0, $vr3, $vr2
-; CHECK-NEXT: vmul.w $vr0, $vr4, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -358,15 +317,9 @@ entry:
define void @vmulwev_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_d_wu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vori.b $vr4, $vr0, 0
-; CHECK-NEXT: vshuf.w $vr4, $vr3, $vr1
-; CHECK-NEXT: vshuf.w $vr0, $vr3, $vr2
-; CHECK-NEXT: vmul.d $vr0, $vr4, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -442,10 +395,7 @@ define void @vmulwod_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.b $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -465,10 +415,7 @@ define void @vmulwod_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -488,10 +435,7 @@ define void @vmulwod_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -566,14 +510,8 @@ define void @vmulwev_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr3, $vr0, $vr1
-; CHECK-NEXT: vslli.h $vr1, $vr2, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -592,14 +530,8 @@ define void @vmulwev_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_w_hu_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr3, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr2, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -618,14 +550,8 @@ define void @vmulwev_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_d_wu_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr3, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr2, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -714,12 +640,7 @@ define void @vmulwod_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.b $vr1, $vr1, 49
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -739,12 +660,7 @@ define void @vmulwod_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.h $vr1, $vr1, 49
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -764,12 +680,7 @@ define void @vmulwod_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 49
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -858,13 +769,7 @@ define void @vmulwev_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr1, $vr2
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -884,13 +789,7 @@ define void @vmulwev_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI25_0)
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.h $vr2, $vr3, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr2
+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -910,13 +809,7 @@ define void @vmulwev_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI26_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI26_0)
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.w $vr2, $vr3, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr2
+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1005,12 +898,7 @@ define void @vmulwod_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 49
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1030,12 +918,7 @@ define void @vmulwod_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 49
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1055,12 +938,7 @@ define void @vmulwod_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 49
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
>From c0bb775e00274cbbd8ddbfd2d5e2b8677c9f2caa Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Thu, 9 Oct 2025 15:59:19 +0800
Subject: [PATCH 2/3] deal with lsx i128
---
.../LoongArch/LoongArchISelLowering.cpp | 56 ++-
.../LoongArch/lsx/ir-instruction/mulwev_od.ll | 452 ++++--------------
2 files changed, 130 insertions(+), 378 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 2763cef394620..032032874cd11 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6691,15 +6691,15 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 &&
- ResTy != MVT::v16i16 && ResTy != MVT::v8i32 &&
- ResTy != MVT::v4i64) // && ResTy != MVT::v2i128)
+ ResTy != MVT::v16i16 && ResTy != MVT::v8i32 && ResTy != MVT::v4i64 &&
+ ResTy != MVT::i128)
return SDValue();
// Combine:
// ti,tii,...,tx = extract_vector_elt t0, {0,2,4,.../1,3,5,...}
// tj,tjj,...,ty = extract_vector_elt t1, {0,2,4,.../1,3,5,...}
- // tm = BUILD_VECTOR ti,tii,...,tx
- // tn = BUILD_VECTOR tj,tjj,...,ty
+ // tm = BUILD_VECTOR ti,tii,...,tx (Only when ResTy != MVT::i128)
+ // tn = BUILD_VECTOR tj,tjj,...,ty (Only when ResTy != MVT::i128)
// ta = {sign/zero}_extend tm
// tb = {sign/zero}_extend tn
// tr = mul ta, tb
@@ -6721,24 +6721,36 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
if (ExtType < 0)
return SDValue();
- SDValue BV0 = N0.getOperand(0);
- SDValue BV1 = N1.getOperand(0);
- if (BV0.getOpcode() != ISD::BUILD_VECTOR ||
- BV1.getOpcode() != ISD::BUILD_VECTOR)
+ SDValue Src0 = N0.getOperand(0);
+ SDValue Src1 = N1.getOperand(0);
+ bool IsScalar = (ResTy == MVT::i128);
+ if (IsScalar && (Src0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Src1.getOpcode() != ISD::EXTRACT_VECTOR_ELT))
+ return SDValue();
+ if (!IsScalar && (Src0.getOpcode() != ISD::BUILD_VECTOR ||
+ Src1.getOpcode() != ISD::BUILD_VECTOR))
return SDValue();
- unsigned ResBits = ResTy.getScalarType().getSizeInBits();
- unsigned BV0Bits = BV0.getValueType().getScalarType().getSizeInBits();
- unsigned BV1Bits = BV1.getValueType().getScalarType().getSizeInBits();
- if (BV0Bits != BV1Bits || ResBits != BV0Bits * 2)
+ unsigned ResBits = ResTy.getScalarSizeInBits();
+ unsigned Src0Bits = Src0.getValueType().getScalarSizeInBits();
+ unsigned Src1Bits = Src1.getValueType().getScalarSizeInBits();
+ if (Src0Bits != Src1Bits || ResBits != Src0Bits * 2)
return SDValue();
+ // Collect all EXTRACT_VECTOR_ELT.
+ SmallVector<std::pair<SDValue, SDValue>> Elems;
+ if (IsScalar) {
+ Elems.emplace_back(Src0, Src1);
+ } else {
+ for (unsigned i = 0; i < Src0.getNumOperands(); ++i)
+ Elems.emplace_back(Src0.getOperand(i), Src1.getOperand(i));
+ }
+
unsigned Index;
SDValue OrigN0, OrigN1;
- for (unsigned i = 0; i < BV0.getNumOperands(); ++i) {
- SDValue Op0 = BV0.getOperand(i);
- SDValue Op1 = BV1.getOperand(i);
- // Each element of BUILD_VECTOR must be EXTRACT_VECTOR_ELT.
+ bool First = true;
+ for (auto &[Op0, Op1] : Elems) {
+ // Each element must be EXTRACT_VECTOR_ELT.
if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
@@ -6750,17 +6762,17 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
auto *IdxC = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
if (!IdxC)
return SDValue();
- unsigned CurIdx = IdxC->getZExtValue();
- if (i == 0) {
+ unsigned CurIdx = IdxC->getZExtValue();
+ if (First) {
if (CurIdx != 0 && CurIdx != 1)
return SDValue();
OrigN0 = Op0.getOperand(0);
OrigN1 = Op1.getOperand(0);
+ First = false;
} else {
- if (CurIdx != Index + 2)
- return SDValue();
- if (Op0.getOperand(0) != OrigN0 || Op1.getOperand(0) != OrigN1)
+ if (CurIdx != Index + 2 || Op0.getOperand(0) != OrigN0 ||
+ Op1.getOperand(0) != OrigN1)
return SDValue();
}
Index = CurIdx;
@@ -6769,7 +6781,7 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
if (OrigN0.getValueType() != OrigN1.getValueType())
return SDValue();
if (OrigN0.getValueType().getVectorNumElements() !=
- ResTy.getVectorNumElements() * 2)
+ (IsScalar ? 1 : ResTy.getVectorNumElements()) * 2)
return SDValue();
SDValue Result;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
index 19b5ab50eef95..3036fcfa49fec 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
@@ -67,64 +67,23 @@ define void @vmulwev_q_d(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vmulwev.q.d $vr0, $vr0, $vr1
; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 0
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 1
-; LA32-NEXT: srai.w $a5, $a2, 31
-; LA32-NEXT: srai.w $a6, $a4, 31
-; LA32-NEXT: mulh.wu $a7, $a1, $a3
-; LA32-NEXT: mul.w $t0, $a2, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: sltu $t0, $a7, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a3
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: mul.w $t1, $a1, $a4
-; LA32-NEXT: add.w $a7, $t1, $a7
-; LA32-NEXT: sltu $t1, $a7, $t1
-; LA32-NEXT: mulh.wu $t2, $a1, $a4
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: add.w $t1, $t0, $t1
-; LA32-NEXT: mul.w $t2, $a2, $a4
-; LA32-NEXT: add.w $t3, $t2, $t1
-; LA32-NEXT: mul.w $t4, $a3, $a5
-; LA32-NEXT: mul.w $t5, $a6, $a1
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: add.w $t7, $t3, $t6
-; LA32-NEXT: sltu $t8, $t7, $t3
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: sltu $t0, $t1, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a4
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: mulh.wu $t1, $a3, $a5
-; LA32-NEXT: add.w $t1, $t1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $a5
-; LA32-NEXT: add.w $a4, $t1, $a4
-; LA32-NEXT: mul.w $a2, $a6, $a2
-; LA32-NEXT: mulh.wu $a5, $a6, $a1
-; LA32-NEXT: add.w $a2, $a5, $a2
-; LA32-NEXT: add.w $a2, $a2, $t5
-; LA32-NEXT: add.w $a2, $a2, $a4
-; LA32-NEXT: sltu $a4, $t6, $t5
-; LA32-NEXT: add.w $a2, $a2, $a4
-; LA32-NEXT: add.w $a2, $t0, $a2
-; LA32-NEXT: add.w $a2, $a2, $t8
-; LA32-NEXT: mul.w $a1, $a1, $a3
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 4
-; LA32-NEXT: st.w $t7, $a0, 8
-; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwev_q_d:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: mul.d $a3, $a1, $a2
-; LA64-NEXT: mulh.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 8
-; LA64-NEXT: st.d $a3, $a0, 0
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwev.q.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -203,64 +162,23 @@ define void @vmulwod_q_d(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 2
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 3
-; LA32-NEXT: srai.w $a5, $a2, 31
-; LA32-NEXT: srai.w $a6, $a4, 31
-; LA32-NEXT: mulh.wu $a7, $a1, $a3
-; LA32-NEXT: mul.w $t0, $a2, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: sltu $t0, $a7, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a3
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: mul.w $t1, $a1, $a4
-; LA32-NEXT: add.w $a7, $t1, $a7
-; LA32-NEXT: sltu $t1, $a7, $t1
-; LA32-NEXT: mulh.wu $t2, $a1, $a4
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: add.w $t1, $t0, $t1
-; LA32-NEXT: mul.w $t2, $a2, $a4
-; LA32-NEXT: add.w $t3, $t2, $t1
-; LA32-NEXT: mul.w $t4, $a3, $a5
-; LA32-NEXT: mul.w $t5, $a6, $a1
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: add.w $t7, $t3, $t6
-; LA32-NEXT: sltu $t8, $t7, $t3
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: sltu $t0, $t1, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a4
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: mulh.wu $t1, $a3, $a5
-; LA32-NEXT: add.w $t1, $t1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $a5
-; LA32-NEXT: add.w $a4, $t1, $a4
-; LA32-NEXT: mul.w $a2, $a6, $a2
-; LA32-NEXT: mulh.wu $a5, $a6, $a1
-; LA32-NEXT: add.w $a2, $a5, $a2
-; LA32-NEXT: add.w $a2, $a2, $t5
-; LA32-NEXT: add.w $a2, $a2, $a4
-; LA32-NEXT: sltu $a4, $t6, $t5
-; LA32-NEXT: add.w $a2, $a2, $a4
-; LA32-NEXT: add.w $a2, $t0, $a2
-; LA32-NEXT: add.w $a2, $a2, $t8
-; LA32-NEXT: mul.w $a1, $a1, $a3
+; LA32-NEXT: vmulwod.q.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 4
-; LA32-NEXT: st.w $t7, $a0, 8
-; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwod_q_d:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 8
-; LA64-NEXT: ld.d $a2, $a2, 8
-; LA64-NEXT: mul.d $a3, $a1, $a2
-; LA64-NEXT: mulh.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 8
-; LA64-NEXT: st.d $a3, $a0, 0
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwod.q.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -339,44 +257,23 @@ define void @vmulwev_q_du(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 1
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 1
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 0
-; LA32-NEXT: mulh.wu $a5, $a2, $a4
-; LA32-NEXT: mul.w $a6, $a1, $a4
-; LA32-NEXT: add.w $a5, $a6, $a5
-; LA32-NEXT: sltu $a6, $a5, $a6
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: mul.w $a7, $a2, $a3
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: sltu $a7, $a5, $a7
-; LA32-NEXT: mulh.wu $t0, $a2, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: add.w $a7, $a6, $a7
-; LA32-NEXT: mul.w $t0, $a1, $a3
-; LA32-NEXT: add.w $t1, $t0, $a7
-; LA32-NEXT: sltu $t0, $t1, $t0
-; LA32-NEXT: sltu $a6, $a7, $a6
-; LA32-NEXT: mulh.wu $a1, $a1, $a3
-; LA32-NEXT: add.w $a1, $a1, $a6
-; LA32-NEXT: add.w $a1, $a1, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a5, $a0, 4
-; LA32-NEXT: st.w $t1, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 12
+; LA32-NEXT: vmulwev.q.du $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwev_q_du:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: mul.d $a3, $a1, $a2
-; LA64-NEXT: mulh.du $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 8
-; LA64-NEXT: st.d $a3, $a0, 0
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwev.q.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -455,44 +352,23 @@ define void @vmulwod_q_du(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 3
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 3
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 2
-; LA32-NEXT: mulh.wu $a5, $a2, $a4
-; LA32-NEXT: mul.w $a6, $a1, $a4
-; LA32-NEXT: add.w $a5, $a6, $a5
-; LA32-NEXT: sltu $a6, $a5, $a6
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: mul.w $a7, $a2, $a3
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: sltu $a7, $a5, $a7
-; LA32-NEXT: mulh.wu $t0, $a2, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: add.w $a7, $a6, $a7
-; LA32-NEXT: mul.w $t0, $a1, $a3
-; LA32-NEXT: add.w $t1, $t0, $a7
-; LA32-NEXT: sltu $t0, $t1, $t0
-; LA32-NEXT: sltu $a6, $a7, $a6
-; LA32-NEXT: mulh.wu $a1, $a1, $a3
-; LA32-NEXT: add.w $a1, $a1, $a6
-; LA32-NEXT: add.w $a1, $a1, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a5, $a0, 4
-; LA32-NEXT: st.w $t1, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 12
+; LA32-NEXT: vmulwod.q.du $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwod_q_du:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 8
-; LA64-NEXT: ld.d $a2, $a2, 8
-; LA64-NEXT: mul.d $a3, $a1, $a2
-; LA64-NEXT: mulh.du $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 8
-; LA64-NEXT: st.d $a3, $a0, 0
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwod.q.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -571,57 +447,23 @@ define void @vmulwev_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 1
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 0
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 1
-; LA32-NEXT: srai.w $a5, $a4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a3
-; LA32-NEXT: mul.w $a7, $a1, $a3
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: sltu $a7, $a6, $a7
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: mul.w $t0, $a2, $a4
-; LA32-NEXT: add.w $a6, $t0, $a6
-; LA32-NEXT: sltu $t0, $a6, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a4
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $a7, $t0
-; LA32-NEXT: mul.w $t1, $a1, $a4
-; LA32-NEXT: add.w $t2, $t1, $t0
-; LA32-NEXT: mul.w $t3, $a5, $a2
-; LA32-NEXT: add.w $t4, $t2, $t3
-; LA32-NEXT: sltu $t5, $t4, $t2
-; LA32-NEXT: sltu $t1, $t2, $t1
-; LA32-NEXT: sltu $a7, $t0, $a7
-; LA32-NEXT: mulh.wu $a4, $a1, $a4
-; LA32-NEXT: add.w $a4, $a4, $a7
-; LA32-NEXT: add.w $a4, $a4, $t1
-; LA32-NEXT: mul.w $a1, $a5, $a1
-; LA32-NEXT: mulh.wu $a5, $a5, $a2
-; LA32-NEXT: add.w $a1, $a5, $a1
-; LA32-NEXT: add.w $a1, $a1, $t3
-; LA32-NEXT: add.w $a1, $a4, $a1
-; LA32-NEXT: add.w $a1, $a1, $t5
-; LA32-NEXT: mul.w $a2, $a2, $a3
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $t4, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 12
+; LA32-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwev_q_du_d:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: srai.d $a3, $a2, 63
-; LA64-NEXT: mulh.du $a4, $a1, $a2
-; LA64-NEXT: mul.d $a3, $a1, $a3
-; LA64-NEXT: add.d $a3, $a4, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a3, $a0, 8
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -700,57 +542,23 @@ define void @vmulwod_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 3
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 2
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 3
-; LA32-NEXT: srai.w $a5, $a4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a3
-; LA32-NEXT: mul.w $a7, $a1, $a3
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: sltu $a7, $a6, $a7
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: mul.w $t0, $a2, $a4
-; LA32-NEXT: add.w $a6, $t0, $a6
-; LA32-NEXT: sltu $t0, $a6, $t0
-; LA32-NEXT: mulh.wu $t1, $a2, $a4
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $a7, $t0
-; LA32-NEXT: mul.w $t1, $a1, $a4
-; LA32-NEXT: add.w $t2, $t1, $t0
-; LA32-NEXT: mul.w $t3, $a5, $a2
-; LA32-NEXT: add.w $t4, $t2, $t3
-; LA32-NEXT: sltu $t5, $t4, $t2
-; LA32-NEXT: sltu $t1, $t2, $t1
-; LA32-NEXT: sltu $a7, $t0, $a7
-; LA32-NEXT: mulh.wu $a4, $a1, $a4
-; LA32-NEXT: add.w $a4, $a4, $a7
-; LA32-NEXT: add.w $a4, $a4, $t1
-; LA32-NEXT: mul.w $a1, $a5, $a1
-; LA32-NEXT: mulh.wu $a5, $a5, $a2
-; LA32-NEXT: add.w $a1, $a5, $a1
-; LA32-NEXT: add.w $a1, $a1, $t3
-; LA32-NEXT: add.w $a1, $a4, $a1
-; LA32-NEXT: add.w $a1, $a1, $t5
-; LA32-NEXT: mul.w $a2, $a2, $a3
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $t4, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 12
+; LA32-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwod_q_du_d:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a2, $a2, 8
-; LA64-NEXT: ld.d $a1, $a1, 8
-; LA64-NEXT: srai.d $a3, $a2, 63
-; LA64-NEXT: mulh.du $a4, $a1, $a2
-; LA64-NEXT: mul.d $a3, $a1, $a3
-; LA64-NEXT: add.d $a3, $a4, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a3, $a0, 8
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -829,57 +637,23 @@ define void @vmulwev_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vmulwev.q.du.d $vr0, $vr1, $vr0
; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 1
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 0
-; LA32-NEXT: srai.w $a5, $a2, 31
-; LA32-NEXT: mulh.wu $a6, $a1, $a4
-; LA32-NEXT: mul.w $a7, $a2, $a4
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: sltu $a7, $a6, $a7
-; LA32-NEXT: mulh.wu $t0, $a2, $a4
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: mul.w $t0, $a1, $a3
-; LA32-NEXT: add.w $a6, $t0, $a6
-; LA32-NEXT: sltu $t0, $a6, $t0
-; LA32-NEXT: mulh.wu $t1, $a1, $a3
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $a7, $t0
-; LA32-NEXT: mul.w $t1, $a2, $a3
-; LA32-NEXT: add.w $t2, $t1, $t0
-; LA32-NEXT: mul.w $t3, $a4, $a5
-; LA32-NEXT: add.w $t4, $t2, $t3
-; LA32-NEXT: sltu $t5, $t4, $t2
-; LA32-NEXT: sltu $t1, $t2, $t1
-; LA32-NEXT: sltu $a7, $t0, $a7
-; LA32-NEXT: mulh.wu $a2, $a2, $a3
-; LA32-NEXT: add.w $a2, $a2, $a7
-; LA32-NEXT: add.w $a2, $a2, $t1
-; LA32-NEXT: mulh.wu $a7, $a4, $a5
-; LA32-NEXT: add.w $a7, $a7, $t3
-; LA32-NEXT: mul.w $a3, $a3, $a5
-; LA32-NEXT: add.w $a3, $a7, $a3
-; LA32-NEXT: add.w $a2, $a2, $a3
-; LA32-NEXT: add.w $a2, $a2, $t5
-; LA32-NEXT: mul.w $a1, $a1, $a4
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $t4, $a0, 8
-; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwev_q_du_d_1:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: srai.d $a3, $a1, 63
-; LA64-NEXT: mulh.du $a4, $a1, $a2
-; LA64-NEXT: mul.d $a3, $a3, $a2
-; LA64-NEXT: add.d $a3, $a4, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a3, $a0, 8
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwev.q.du.d $vr0, $vr1, $vr0
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
@@ -958,57 +732,23 @@ define void @vmulwod_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vpickve2gr.w $a1, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
-; LA32-NEXT: vpickve2gr.w $a3, $vr1, 3
-; LA32-NEXT: vpickve2gr.w $a4, $vr1, 2
-; LA32-NEXT: srai.w $a5, $a2, 31
-; LA32-NEXT: mulh.wu $a6, $a1, $a4
-; LA32-NEXT: mul.w $a7, $a2, $a4
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: sltu $a7, $a6, $a7
-; LA32-NEXT: mulh.wu $t0, $a2, $a4
-; LA32-NEXT: add.w $a7, $t0, $a7
-; LA32-NEXT: mul.w $t0, $a1, $a3
-; LA32-NEXT: add.w $a6, $t0, $a6
-; LA32-NEXT: sltu $t0, $a6, $t0
-; LA32-NEXT: mulh.wu $t1, $a1, $a3
-; LA32-NEXT: add.w $t0, $t1, $t0
-; LA32-NEXT: add.w $t0, $a7, $t0
-; LA32-NEXT: mul.w $t1, $a2, $a3
-; LA32-NEXT: add.w $t2, $t1, $t0
-; LA32-NEXT: mul.w $t3, $a4, $a5
-; LA32-NEXT: add.w $t4, $t2, $t3
-; LA32-NEXT: sltu $t5, $t4, $t2
-; LA32-NEXT: sltu $t1, $t2, $t1
-; LA32-NEXT: sltu $a7, $t0, $a7
-; LA32-NEXT: mulh.wu $a2, $a2, $a3
-; LA32-NEXT: add.w $a2, $a2, $a7
-; LA32-NEXT: add.w $a2, $a2, $t1
-; LA32-NEXT: mulh.wu $a7, $a4, $a5
-; LA32-NEXT: add.w $a7, $a7, $t3
-; LA32-NEXT: mul.w $a3, $a3, $a5
-; LA32-NEXT: add.w $a3, $a7, $a3
-; LA32-NEXT: add.w $a2, $a2, $a3
-; LA32-NEXT: add.w $a2, $a2, $t5
-; LA32-NEXT: mul.w $a1, $a1, $a4
+; LA32-NEXT: vmulwod.q.du.d $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: st.w $a4, $a0, 12
+; LA32-NEXT: st.w $a3, $a0, 8
+; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: st.w $a1, $a0, 0
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $t4, $a0, 8
-; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ret
;
; LA64-LABEL: vmulwod_q_du_d_1:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 8
-; LA64-NEXT: ld.d $a2, $a2, 8
-; LA64-NEXT: srai.d $a3, $a1, 63
-; LA64-NEXT: mulh.du $a4, $a1, $a2
-; LA64-NEXT: mul.d $a3, $a3, $a2
-; LA64-NEXT: add.d $a3, $a4, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a2
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a3, $a0, 8
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vmulwod.q.du.d $vr0, $vr1, $vr0
+; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
>From f3e91f4976d0f96de0f0b8a24cdb2be67e77df75 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Thu, 9 Oct 2025 17:21:23 +0800
Subject: [PATCH 3/3] including v2i128
---
.../LoongArch/LoongArchISelLowering.cpp | 17 +-
.../lasx/ir-instruction/mulwev_od.ll | 928 ++----------------
2 files changed, 71 insertions(+), 874 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 032032874cd11..9952cfaefee57 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6681,7 +6681,8 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
}
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
if (!DCI.isBeforeLegalize())
return SDValue();
@@ -6690,9 +6691,17 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Note: v2i128 is an unsupported MVT vector type (see
+ // MachineValueType.h::getVectorVT()), use NumElements and SizeInBits to
+ // identify it.
+ bool HasLSXOnly = Subtarget.hasExtLSX() && !Subtarget.hasExtLASX();
+ bool Isv2i128 = ResTy.isVector() && ResTy.getVectorNumElements() == 2 &&
+ ResTy.getScalarSizeInBits() == 128;
if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 &&
- ResTy != MVT::v16i16 && ResTy != MVT::v8i32 && ResTy != MVT::v4i64 &&
- ResTy != MVT::i128)
+ ResTy != MVT::i128 && ResTy != MVT::v16i16 && ResTy != MVT::v8i32 &&
+ ResTy != MVT::v4i64 && !Isv2i128)
+ return SDValue();
+ if (HasLSXOnly && (ResTy.is256BitVector() || Isv2i128))
return SDValue();
// Combine:
@@ -6837,7 +6846,7 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
- return performMULCombine(N, DAG, DCI);
+ return performMULCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
index 605325f4dc4f4..ed3a31d12ee83 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_b:
@@ -63,139 +63,13 @@ entry:
}
define void @vmulwev_q_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $fp, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s1, $sp, 20 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s2, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s3, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s4, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $t3, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a5, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $t4, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 5
-; LA32-NEXT: srai.w $t1, $a5, 31
-; LA32-NEXT: srai.w $t5, $t3, 31
-; LA32-NEXT: srai.w $t0, $a7, 31
-; LA32-NEXT: srai.w $t6, $t4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a4
-; LA32-NEXT: mul.w $t2, $t3, $a4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t7, $t3, $a4
-; LA32-NEXT: add.w $t7, $t7, $t2
-; LA32-NEXT: mul.w $t2, $a2, $t4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t8, $a2, $t4
-; LA32-NEXT: add.w $t2, $t8, $t2
-; LA32-NEXT: add.w $t8, $t7, $t2
-; LA32-NEXT: mul.w $fp, $t3, $t4
-; LA32-NEXT: add.w $s0, $fp, $t8
-; LA32-NEXT: mul.w $s1, $a4, $t5
-; LA32-NEXT: mul.w $s2, $t6, $a2
-; LA32-NEXT: add.w $s3, $s2, $s1
-; LA32-NEXT: add.w $t2, $s0, $s3
-; LA32-NEXT: sltu $s4, $t2, $s0
-; LA32-NEXT: sltu $fp, $s0, $fp
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: mulh.wu $t8, $t3, $t4
-; LA32-NEXT: add.w $t7, $t8, $t7
-; LA32-NEXT: add.w $t7, $t7, $fp
-; LA32-NEXT: mulh.wu $t8, $a4, $t5
-; LA32-NEXT: add.w $t8, $t8, $s1
-; LA32-NEXT: mul.w $t4, $t4, $t5
-; LA32-NEXT: add.w $t4, $t8, $t4
-; LA32-NEXT: mul.w $t3, $t6, $t3
-; LA32-NEXT: mulh.wu $t5, $t6, $a2
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: add.w $t3, $t3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: sltu $t4, $s3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: add.w $t3, $t7, $t3
-; LA32-NEXT: add.w $t3, $t3, $s4
-; LA32-NEXT: mulh.wu $t4, $a1, $a3
-; LA32-NEXT: mul.w $t5, $a5, $a3
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: sltu $t5, $t4, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a3
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: mul.w $t6, $a1, $a7
-; LA32-NEXT: add.w $t4, $t6, $t4
-; LA32-NEXT: sltu $t6, $t4, $t6
-; LA32-NEXT: mulh.wu $t7, $a1, $a7
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: mul.w $t7, $a5, $a7
-; LA32-NEXT: add.w $t8, $t7, $t6
-; LA32-NEXT: mul.w $fp, $a3, $t1
-; LA32-NEXT: mul.w $s0, $t0, $a1
-; LA32-NEXT: add.w $s1, $s0, $fp
-; LA32-NEXT: add.w $s2, $t8, $s1
-; LA32-NEXT: sltu $s3, $s2, $t8
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t5, $t7
-; LA32-NEXT: mulh.wu $t6, $a3, $t1
-; LA32-NEXT: add.w $t6, $t6, $fp
-; LA32-NEXT: mul.w $a7, $a7, $t1
-; LA32-NEXT: add.w $a7, $t6, $a7
-; LA32-NEXT: mul.w $a5, $t0, $a5
-; LA32-NEXT: mulh.wu $t0, $t0, $a1
-; LA32-NEXT: add.w $a5, $t0, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: sltu $a7, $s1, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: add.w $a5, $t5, $a5
-; LA32-NEXT: add.w $a5, $a5, $s3
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t4, $a0, 20
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $s2, $a0, 24
-; LA32-NEXT: st.w $t2, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $t3, $a0, 12
-; LA32-NEXT: ld.w $s4, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s3, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s2, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s1, $sp, 20 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s0, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.d $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.d $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -269,139 +143,13 @@ entry:
}
define void @vmulwod_q_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $fp, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s1, $sp, 20 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s2, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s3, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s4, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $t3, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a5, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $t4, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA32-NEXT: srai.w $t1, $a5, 31
-; LA32-NEXT: srai.w $t5, $t3, 31
-; LA32-NEXT: srai.w $t0, $a7, 31
-; LA32-NEXT: srai.w $t6, $t4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a4
-; LA32-NEXT: mul.w $t2, $t3, $a4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t7, $t3, $a4
-; LA32-NEXT: add.w $t7, $t7, $t2
-; LA32-NEXT: mul.w $t2, $a2, $t4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t8, $a2, $t4
-; LA32-NEXT: add.w $t2, $t8, $t2
-; LA32-NEXT: add.w $t8, $t7, $t2
-; LA32-NEXT: mul.w $fp, $t3, $t4
-; LA32-NEXT: add.w $s0, $fp, $t8
-; LA32-NEXT: mul.w $s1, $a4, $t5
-; LA32-NEXT: mul.w $s2, $t6, $a2
-; LA32-NEXT: add.w $s3, $s2, $s1
-; LA32-NEXT: add.w $t2, $s0, $s3
-; LA32-NEXT: sltu $s4, $t2, $s0
-; LA32-NEXT: sltu $fp, $s0, $fp
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: mulh.wu $t8, $t3, $t4
-; LA32-NEXT: add.w $t7, $t8, $t7
-; LA32-NEXT: add.w $t7, $t7, $fp
-; LA32-NEXT: mulh.wu $t8, $a4, $t5
-; LA32-NEXT: add.w $t8, $t8, $s1
-; LA32-NEXT: mul.w $t4, $t4, $t5
-; LA32-NEXT: add.w $t4, $t8, $t4
-; LA32-NEXT: mul.w $t3, $t6, $t3
-; LA32-NEXT: mulh.wu $t5, $t6, $a2
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: add.w $t3, $t3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: sltu $t4, $s3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: add.w $t3, $t7, $t3
-; LA32-NEXT: add.w $t3, $t3, $s4
-; LA32-NEXT: mulh.wu $t4, $a1, $a3
-; LA32-NEXT: mul.w $t5, $a5, $a3
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: sltu $t5, $t4, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a3
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: mul.w $t6, $a1, $a7
-; LA32-NEXT: add.w $t4, $t6, $t4
-; LA32-NEXT: sltu $t6, $t4, $t6
-; LA32-NEXT: mulh.wu $t7, $a1, $a7
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: mul.w $t7, $a5, $a7
-; LA32-NEXT: add.w $t8, $t7, $t6
-; LA32-NEXT: mul.w $fp, $a3, $t1
-; LA32-NEXT: mul.w $s0, $t0, $a1
-; LA32-NEXT: add.w $s1, $s0, $fp
-; LA32-NEXT: add.w $s2, $t8, $s1
-; LA32-NEXT: sltu $s3, $s2, $t8
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t5, $t7
-; LA32-NEXT: mulh.wu $t6, $a3, $t1
-; LA32-NEXT: add.w $t6, $t6, $fp
-; LA32-NEXT: mul.w $a7, $a7, $t1
-; LA32-NEXT: add.w $a7, $t6, $a7
-; LA32-NEXT: mul.w $a5, $t0, $a5
-; LA32-NEXT: mulh.wu $t0, $t0, $a1
-; LA32-NEXT: add.w $a5, $t0, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: sltu $a7, $s1, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: add.w $a5, $t5, $a5
-; LA32-NEXT: add.w $a5, $a5, $s3
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t4, $a0, 20
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $s2, $a0, 24
-; LA32-NEXT: st.w $t2, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $t3, $a0, 12
-; LA32-NEXT: ld.w $s4, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s3, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s2, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s1, $sp, 20 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s0, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.d $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.d $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -475,85 +223,13 @@ entry:
}
define void @vmulwev_q_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 5
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 0
-; LA32-NEXT: mulh.wu $t1, $a4, $t0
-; LA32-NEXT: mul.w $t2, $a3, $t0
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: sltu $t2, $t1, $t2
-; LA32-NEXT: mulh.wu $t3, $a3, $t0
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a4, $a7
-; LA32-NEXT: add.w $t1, $t3, $t1
-; LA32-NEXT: sltu $t3, $t1, $t3
-; LA32-NEXT: mulh.wu $t4, $a4, $a7
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a3, $a7
-; LA32-NEXT: add.w $t5, $t4, $t3
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a3, $a3, $a7
-; LA32-NEXT: add.w $a3, $a3, $t2
-; LA32-NEXT: add.w $a3, $a3, $t4
-; LA32-NEXT: mulh.wu $a7, $a2, $a6
-; LA32-NEXT: mul.w $t2, $a1, $a6
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t3, $a1, $a6
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a2, $a5
-; LA32-NEXT: add.w $a7, $t3, $a7
-; LA32-NEXT: sltu $t3, $a7, $t3
-; LA32-NEXT: mulh.wu $t4, $a2, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t6, $t4, $t3
-; LA32-NEXT: sltu $t4, $t6, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a1, $a1, $a5
-; LA32-NEXT: add.w $a1, $a1, $t2
-; LA32-NEXT: add.w $a1, $a1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a6
-; LA32-NEXT: st.w $a2, $a0, 16
-; LA32-NEXT: st.w $a4, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t1, $a0, 4
-; LA32-NEXT: st.w $t6, $a0, 24
-; LA32-NEXT: st.w $t5, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 28
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.du $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.du $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -627,85 +303,13 @@ entry:
}
define void @vmulwod_q_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 2
-; LA32-NEXT: mulh.wu $t1, $a4, $t0
-; LA32-NEXT: mul.w $t2, $a3, $t0
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: sltu $t2, $t1, $t2
-; LA32-NEXT: mulh.wu $t3, $a3, $t0
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a4, $a7
-; LA32-NEXT: add.w $t1, $t3, $t1
-; LA32-NEXT: sltu $t3, $t1, $t3
-; LA32-NEXT: mulh.wu $t4, $a4, $a7
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a3, $a7
-; LA32-NEXT: add.w $t5, $t4, $t3
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a3, $a3, $a7
-; LA32-NEXT: add.w $a3, $a3, $t2
-; LA32-NEXT: add.w $a3, $a3, $t4
-; LA32-NEXT: mulh.wu $a7, $a2, $a6
-; LA32-NEXT: mul.w $t2, $a1, $a6
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t3, $a1, $a6
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a2, $a5
-; LA32-NEXT: add.w $a7, $t3, $a7
-; LA32-NEXT: sltu $t3, $a7, $t3
-; LA32-NEXT: mulh.wu $t4, $a2, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t6, $t4, $t3
-; LA32-NEXT: sltu $t4, $t6, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a1, $a1, $a5
-; LA32-NEXT: add.w $a1, $a1, $t2
-; LA32-NEXT: add.w $a1, $a1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a6
-; LA32-NEXT: st.w $a2, $a0, 16
-; LA32-NEXT: st.w $a4, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t1, $a0, 4
-; LA32-NEXT: st.w $t6, $a0, 24
-; LA32-NEXT: st.w $t5, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 28
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.du $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.du $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -779,117 +383,13 @@ entry:
}
define void @vmulwev_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 5
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA32-NEXT: srai.w $t1, $t0, 31
-; LA32-NEXT: srai.w $t2, $a7, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a5
-; LA32-NEXT: mul.w $t4, $a6, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $a7
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $a7
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $t2, $a2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a7, $a6, $a7
-; LA32-NEXT: add.w $a7, $a7, $t4
-; LA32-NEXT: add.w $a7, $a7, $t6
-; LA32-NEXT: mul.w $a6, $t2, $a6
-; LA32-NEXT: mulh.wu $t2, $t2, $a2
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: add.w $a6, $a6, $t8
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: mul.w $t2, $a3, $a4
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t4, $a3, $a4
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $t0
-; LA32-NEXT: add.w $a7, $t4, $a7
-; LA32-NEXT: sltu $t4, $a7, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $t0
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a3, $t0
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $t1, $a1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $t0, $a3, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t0, $t5
-; LA32-NEXT: mul.w $a3, $t1, $a3
-; LA32-NEXT: mulh.wu $t1, $t1, $a1
-; LA32-NEXT: add.w $a3, $t1, $a3
-; LA32-NEXT: add.w $a3, $a3, $t7
-; LA32-NEXT: add.w $a3, $t0, $a3
-; LA32-NEXT: add.w $a3, $a3, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a5
-; LA32-NEXT: mul.w $a1, $a1, $a4
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 0
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 2
-; LA64-NEXT: srai.d $a5, $a4, 63
-; LA64-NEXT: srai.d $a6, $a3, 63
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a6, $a2, $a6
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a5, $a1, $a5
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: st.d $a1, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -963,117 +463,13 @@ entry:
}
define void @vmulwod_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 7
-; LA32-NEXT: srai.w $t1, $t0, 31
-; LA32-NEXT: srai.w $t2, $a7, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a5
-; LA32-NEXT: mul.w $t4, $a6, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $a7
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $a7
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $t2, $a2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a7, $a6, $a7
-; LA32-NEXT: add.w $a7, $a7, $t4
-; LA32-NEXT: add.w $a7, $a7, $t6
-; LA32-NEXT: mul.w $a6, $t2, $a6
-; LA32-NEXT: mulh.wu $t2, $t2, $a2
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: add.w $a6, $a6, $t8
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: mul.w $t2, $a3, $a4
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t4, $a3, $a4
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $t0
-; LA32-NEXT: add.w $a7, $t4, $a7
-; LA32-NEXT: sltu $t4, $a7, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $t0
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a3, $t0
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $t1, $a1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $t0, $a3, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t0, $t5
-; LA32-NEXT: mul.w $a3, $t1, $a3
-; LA32-NEXT: mulh.wu $t1, $t1, $a1
-; LA32-NEXT: add.w $a3, $t1, $a3
-; LA32-NEXT: add.w $a3, $a3, $t7
-; LA32-NEXT: add.w $a3, $t0, $a3
-; LA32-NEXT: add.w $a3, $a3, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a5
-; LA32-NEXT: mul.w $a1, $a1, $a4
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 1
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 3
-; LA64-NEXT: srai.d $a5, $a4, 63
-; LA64-NEXT: srai.d $a6, $a3, 63
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a6, $a2, $a6
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a5, $a1, $a5
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: st.d $a1, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -1147,117 +543,13 @@ entry:
}
define void @vmulwev_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du_d_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 5
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 0
-; LA32-NEXT: srai.w $t1, $a7, 31
-; LA32-NEXT: srai.w $t2, $a6, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a4
-; LA32-NEXT: mul.w $t4, $a6, $a4
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a4
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $t0
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $t0
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $t0
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $a4, $t2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $t4
-; LA32-NEXT: add.w $a6, $a6, $t6
-; LA32-NEXT: mulh.wu $t4, $a4, $t2
-; LA32-NEXT: add.w $t4, $t4, $t8
-; LA32-NEXT: mul.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: add.w $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: mul.w $t2, $a7, $a3
-; LA32-NEXT: add.w $t0, $t2, $t0
-; LA32-NEXT: sltu $t2, $t0, $t2
-; LA32-NEXT: mulh.wu $t4, $a7, $a3
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: sltu $t4, $t0, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a7, $a5
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $a3, $t1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $a7, $a7, $a5
-; LA32-NEXT: add.w $a7, $a7, $t2
-; LA32-NEXT: add.w $a7, $a7, $t5
-; LA32-NEXT: mulh.wu $t2, $a3, $t1
-; LA32-NEXT: add.w $t2, $t2, $t7
-; LA32-NEXT: mul.w $a5, $a5, $t1
-; LA32-NEXT: add.w $a5, $t2, $a5
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t0, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du_d_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: srai.d $a5, $a2, 63
-; LA64-NEXT: srai.d $a6, $a1, 63
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a6, $a6, $a4
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a5, $a5, $a3
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: st.d $a2, $a0, 16
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du_d_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -1331,117 +623,13 @@ entry:
}
define void @vmulwod_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du_d_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a7, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 2
-; LA32-NEXT: srai.w $t1, $a7, 31
-; LA32-NEXT: srai.w $t2, $a6, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a4
-; LA32-NEXT: mul.w $t4, $a6, $a4
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a4
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $t0
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $t0
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $t0
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $a4, $t2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $t4
-; LA32-NEXT: add.w $a6, $a6, $t6
-; LA32-NEXT: mulh.wu $t4, $a4, $t2
-; LA32-NEXT: add.w $t4, $t4, $t8
-; LA32-NEXT: mul.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: add.w $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: mul.w $t2, $a7, $a3
-; LA32-NEXT: add.w $t0, $t2, $t0
-; LA32-NEXT: sltu $t2, $t0, $t2
-; LA32-NEXT: mulh.wu $t4, $a7, $a3
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: sltu $t4, $t0, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a7, $a5
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $a3, $t1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $a7, $a7, $a5
-; LA32-NEXT: add.w $a7, $a7, $t2
-; LA32-NEXT: add.w $a7, $a7, $t5
-; LA32-NEXT: mulh.wu $t2, $a3, $t1
-; LA32-NEXT: add.w $t2, $t2, $t7
-; LA32-NEXT: mul.w $a5, $a5, $t1
-; LA32-NEXT: add.w $a5, $t2, $a5
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t0, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du_d_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: srai.d $a5, $a2, 63
-; LA64-NEXT: srai.d $a6, $a1, 63
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a6, $a6, $a4
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a5, $a5, $a3
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: st.d $a2, $a0, 16
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du_d_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
More information about the llvm-branch-commits
mailing list