[llvm-branch-commits] [llvm] [LoongArch] Perform DAG combine for MUL to generate `[x]vmulw{ev/od}` (PR #161368)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Sep 30 06:06:07 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/161368
None
>From 50ba8563430a2ad52dc43fbf2eef11757ae041a1 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 30 Sep 2025 20:53:06 +0800
Subject: [PATCH] [LoongArch] Perform DAG combine for MUL to generate
`[x]vmulw{ev/od}`
---
.../LoongArch/LoongArchISelLowering.cpp | 118 +
.../Target/LoongArch/LoongArchISelLowering.h | 11 +-
.../LoongArch/LoongArchLASXInstrInfo.td | 41 +
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 48 +
.../lasx/ir-instruction/mulwev_od.ll | 2268 +----------------
.../LoongArch/lsx/ir-instruction/mulwev_od.ll | 186 +-
6 files changed, 373 insertions(+), 2299 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4cfbfca45d359..edef58338c019 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -462,6 +462,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasExtLSX()) {
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(ISD::MUL);
}
// Set DAG combine for 'LASX' feature.
@@ -6680,6 +6681,115 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT ResTy = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 &&
+ ResTy != MVT::v16i16 && ResTy != MVT::v8i32 &&
+ ResTy != MVT::v4i64) // && ResTy != MVT::v2i128)
+ return SDValue();
+
+ // Combine:
+ // ti,tii,...,tx = extract_vector_elt t0, {0,2,4,.../1,3,5,...}
+ // tj,tjj,...,ty = extract_vector_elt t1, {0,2,4,.../1,3,5,...}
+ // tm = BUILD_VECTOR ti,tii,...,tx
+ // tn = BUILD_VECTOR tj,tjj,...,ty
+ // ta = {sign/zero}_extend tm
+ // tb = {sign/zero}_extend tn
+ // tr = mul ta, tb
+ // to:
+ // tr = VMULW{EV/OD}[U/US] t0, t1
+ auto getExtType = [](unsigned Op0, unsigned Op1) -> unsigned {
+ if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::SIGN_EXTEND)
+ return 0;
+ if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::ZERO_EXTEND)
+ return 1;
+ if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::SIGN_EXTEND)
+ return 2;
+ if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::ZERO_EXTEND)
+ return 3;
+ return -1;
+ };
+
+ unsigned ExtType = getExtType(N0.getOpcode(), N1.getOpcode());
+ if (ExtType < 0)
+ return SDValue();
+
+ SDValue BV0 = N0.getOperand(0);
+ SDValue BV1 = N1.getOperand(0);
+ if (BV0.getOpcode() != ISD::BUILD_VECTOR ||
+ BV1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ unsigned ResBits = ResTy.getScalarType().getSizeInBits();
+ unsigned BV0Bits = BV0.getValueType().getScalarType().getSizeInBits();
+ unsigned BV1Bits = BV1.getValueType().getScalarType().getSizeInBits();
+ if (BV0Bits != BV1Bits || ResBits != BV0Bits * 2)
+ return SDValue();
+
+ unsigned Index;
+ SDValue OrigN0, OrigN1;
+ for (unsigned i = 0; i < BV0.getNumOperands(); ++i) {
+ SDValue Op0 = BV0.getOperand(i);
+ SDValue Op1 = BV1.getOperand(i);
+ // Each element of BUILD_VECTOR must be EXTRACT_VECTOR_ELT.
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Check each EXTRACT_VECTOR_ELT's source vector and index.
+ if (Op0.getOperand(1) != Op1.getOperand(1))
+ return SDValue();
+
+ auto *IdxC = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (!IdxC)
+ return SDValue();
+ unsigned CurIdx = IdxC->getZExtValue();
+
+ if (i == 0) {
+ if (CurIdx != 0 && CurIdx != 1)
+ return SDValue();
+ OrigN0 = Op0.getOperand(0);
+ OrigN1 = Op1.getOperand(0);
+ } else {
+ if (CurIdx != Index + 2)
+ return SDValue();
+ if (Op0.getOperand(0) != OrigN0 || Op1.getOperand(0) != OrigN1)
+ return SDValue();
+ }
+ Index = CurIdx;
+ }
+
+ if (OrigN0.getValueType() != OrigN1.getValueType())
+ return SDValue();
+ if (OrigN0.getValueType().getVectorNumElements() !=
+ ResTy.getVectorNumElements() * 2)
+ return SDValue();
+
+ SDValue Result;
+ EVT OrigTy = OrigN0.getValueType();
+ bool IsEven = (Index % 2 == 0);
+
+ static const unsigned OpcTable[3][2] = {
+ {LoongArchISD::VMULWOD, LoongArchISD::VMULWEV},
+ {LoongArchISD::VMULWODU, LoongArchISD::VMULWEVU},
+ {LoongArchISD::VMULWODUS, LoongArchISD::VMULWEVUS}};
+
+ if (ExtType == 3)
+ Result = DAG.getNode(OpcTable[2][IsEven], DL, OrigTy, OrigN1, OrigN0);
+ else
+ Result = DAG.getNode(OpcTable[ExtType][IsEven], DL, OrigTy, OrigN0, OrigN1);
+
+ return DAG.getBitcast(ResTy, Result);
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6715,6 +6825,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case ISD::MUL:
+ return performMULCombine(N, DAG, DCI);
}
return SDValue();
}
@@ -7527,6 +7639,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XVMSKEQZ)
NODE_NAME_CASE(XVMSKNEZ)
NODE_NAME_CASE(VHADDW)
+ NODE_NAME_CASE(VMULWEV)
+ NODE_NAME_CASE(VMULWOD)
+ NODE_NAME_CASE(VMULWEVU)
+ NODE_NAME_CASE(VMULWODU)
+ NODE_NAME_CASE(VMULWEVUS)
+ NODE_NAME_CASE(VMULWODUS)
}
#undef NODE_NAME_CASE
return nullptr;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..1e5632eb00f7b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -189,7 +189,16 @@ enum NodeType : unsigned {
XVMSKNEZ,
// Vector Horizontal Addition with Widening‌
- VHADDW
+ VHADDW,
+
+ // Perform element-wise vector multiplication at even/odd indices,
+ // and keep each result in its corresponding widened slot
+ VMULWEV,
+ VMULWOD,
+ VMULWEVU,
+ VMULWODU,
+ VMULWEVUS,
+ VMULWODUS
// Intrinsic operations end =============================================
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 5143d53bad719..7c28efd88ae09 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1328,6 +1328,39 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
}
}
+multiclass XVmulwPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass XVmulwuPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_BU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_HU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_WU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_DU") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass XVmulwusPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H_BU_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W_HU_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D_WU_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_Q_DU_D") LASX256:$xj, LASX256:$xk)>;
+}
+
let Predicates = [HasExtLASX] in {
// XVADD_{B/H/W/D}
@@ -1365,6 +1398,14 @@ defm : PatXrXr<mul, "XVMUL">;
defm : PatXrXr<mulhs, "XVMUH">;
defm : PatXrXrU<mulhu, "XVMUH">;
+// XVMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], XVMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D}
+defm : XVmulwPat<loongarch_vmulwev, "XVMULWEV">;
+defm : XVmulwPat<loongarch_vmulwod, "XVMULWOD">;
+defm : XVmulwuPat<loongarch_vmulwevu, "XVMULWEV">;
+defm : XVmulwuPat<loongarch_vmulwodu, "XVMULWOD">;
+defm : XVmulwusPat<loongarch_vmulwevus, "XVMULWEV">;
+defm : XVmulwusPat<loongarch_vmulwodus, "XVMULWOD">;
+
// XVMADD_{B/H/W/D}
defm : PatXrXrXr<muladd, "XVMADD">;
// XVMSUB_{B/H/W/D}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 8d1dc99e316c9..e34f6d7e58610 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -82,6 +82,13 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;
+def loongarch_vmulwev: SDNode<"LoongArchISD::VMULWEV", SDT_LoongArchV2R>;
+def loongarch_vmulwod: SDNode<"LoongArchISD::VMULWOD", SDT_LoongArchV2R>;
+def loongarch_vmulwevu: SDNode<"LoongArchISD::VMULWEVU", SDT_LoongArchV2R>;
+def loongarch_vmulwodu: SDNode<"LoongArchISD::VMULWODU", SDT_LoongArchV2R>;
+def loongarch_vmulwevus: SDNode<"LoongArchISD::VMULWEVUS", SDT_LoongArchV2R>;
+def loongarch_vmulwodus: SDNode<"LoongArchISD::VMULWODUS", SDT_LoongArchV2R>;
+
def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
@@ -1518,6 +1525,39 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VmulwPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass VmulwuPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_BU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_HU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_WU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_DU") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass VmulwusPat<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H_BU_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W_HU_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D_WU_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_Q_DU_D") LSX128:$vj, LSX128:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1555,6 +1595,14 @@ defm : PatVrVr<mul, "VMUL">;
defm : PatVrVr<mulhs, "VMUH">;
defm : PatVrVrU<mulhu, "VMUH">;
+// VMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], VMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D}
+defm : VmulwPat<loongarch_vmulwev, "VMULWEV">;
+defm : VmulwPat<loongarch_vmulwod, "VMULWOD">;
+defm : VmulwuPat<loongarch_vmulwevu, "VMULWEV">;
+defm : VmulwuPat<loongarch_vmulwodu, "VMULWOD">;
+defm : VmulwusPat<loongarch_vmulwevus, "VMULWEV">;
+defm : VmulwusPat<loongarch_vmulwodus, "VMULWOD">;
+
// VMADD_{B/H/W/D}
defm : PatVrVrVr<muladd, "VMADD">;
// VMSUB_{B/H/W/D}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
index c8796b839913c..605325f4dc4f4 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
@@ -5,109 +5,9 @@
define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -127,59 +27,7 @@ define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 6
-; CHECK-NEXT: vpickve2gr.h $a2, $vr2, 4
-; CHECK-NEXT: vpickve2gr.h $a3, $vr2, 2
-; CHECK-NEXT: vpickve2gr.h $a4, $vr2, 0
-; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 6
-; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 4
-; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 2
-; CHECK-NEXT: vpickve2gr.h $t0, $vr0, 0
-; CHECK-NEXT: xvpermi.d $xr0, $xr1, 14
-; CHECK-NEXT: vpickve2gr.h $t1, $vr0, 6
-; CHECK-NEXT: vpickve2gr.h $t2, $vr0, 4
-; CHECK-NEXT: vpickve2gr.h $t3, $vr0, 2
-; CHECK-NEXT: vpickve2gr.h $t4, $vr0, 0
-; CHECK-NEXT: vpickve2gr.h $t5, $vr1, 6
-; CHECK-NEXT: vpickve2gr.h $t6, $vr1, 4
-; CHECK-NEXT: vpickve2gr.h $t7, $vr1, 2
-; CHECK-NEXT: vpickve2gr.h $t8, $vr1, 0
-; CHECK-NEXT: ext.w.h $t0, $t0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; CHECK-NEXT: ext.w.h $a7, $a7
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; CHECK-NEXT: ext.w.h $a6, $a6
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; CHECK-NEXT: ext.w.h $a5, $a5
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; CHECK-NEXT: ext.w.h $a4, $a4
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; CHECK-NEXT: ext.w.h $a3, $a3
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; CHECK-NEXT: ext.w.h $a2, $a2
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; CHECK-NEXT: ext.w.h $a1, $a1
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ext.w.h $a1, $t8
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t7
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t6
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t5
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: ext.w.h $a1, $t4
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t3
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t2
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t1
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -195,73 +43,13 @@ entry:
}
define void @vmulwev_d_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: srai.w $a1, $t0, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 2
-; LA32-NEXT: srai.w $a1, $a7, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a6, 0
-; LA32-NEXT: srai.w $a1, $a6, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a5, 2
-; LA32-NEXT: srai.w $a1, $a5, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -423,109 +211,9 @@ entry:
define void @vmulwod_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -545,59 +233,7 @@ define void @vmulwod_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 7
-; CHECK-NEXT: vpickve2gr.h $a2, $vr2, 5
-; CHECK-NEXT: vpickve2gr.h $a3, $vr2, 3
-; CHECK-NEXT: vpickve2gr.h $a4, $vr2, 1
-; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 7
-; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 5
-; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 3
-; CHECK-NEXT: vpickve2gr.h $t0, $vr0, 1
-; CHECK-NEXT: xvpermi.d $xr0, $xr1, 14
-; CHECK-NEXT: vpickve2gr.h $t1, $vr0, 7
-; CHECK-NEXT: vpickve2gr.h $t2, $vr0, 5
-; CHECK-NEXT: vpickve2gr.h $t3, $vr0, 3
-; CHECK-NEXT: vpickve2gr.h $t4, $vr0, 1
-; CHECK-NEXT: vpickve2gr.h $t5, $vr1, 7
-; CHECK-NEXT: vpickve2gr.h $t6, $vr1, 5
-; CHECK-NEXT: vpickve2gr.h $t7, $vr1, 3
-; CHECK-NEXT: vpickve2gr.h $t8, $vr1, 1
-; CHECK-NEXT: ext.w.h $t0, $t0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; CHECK-NEXT: ext.w.h $a7, $a7
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; CHECK-NEXT: ext.w.h $a6, $a6
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; CHECK-NEXT: ext.w.h $a5, $a5
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; CHECK-NEXT: ext.w.h $a4, $a4
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; CHECK-NEXT: ext.w.h $a3, $a3
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; CHECK-NEXT: ext.w.h $a2, $a2
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; CHECK-NEXT: ext.w.h $a1, $a1
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ext.w.h $a1, $t8
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t7
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t6
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t5
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; CHECK-NEXT: ext.w.h $a1, $t4
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; CHECK-NEXT: ext.w.h $a1, $t3
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; CHECK-NEXT: ext.w.h $a1, $t2
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; CHECK-NEXT: ext.w.h $a1, $t1
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -613,73 +249,13 @@ entry:
}
define void @vmulwod_d_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: srai.w $a1, $t0, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 2
-; LA32-NEXT: srai.w $a1, $a7, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a6, 0
-; LA32-NEXT: srai.w $a1, $a6, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a5, 2
-; LA32-NEXT: srai.w $a1, $a5, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -841,109 +417,9 @@ entry:
define void @vmulwev_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -959,125 +435,13 @@ entry:
}
define void @vmulwev_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1091,63 +455,13 @@ entry:
}
define void @vmulwev_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvrepli.b $xr1, 0
-; LA32-NEXT: xvld $xr2, $a2, 0
-; LA32-NEXT: xvori.b $xr3, $xr1, 0
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr4, $xr0, 2
-; LA32-NEXT: xvinsve0.w $xr3, $xr4, 2
-; LA32-NEXT: xvpickve.w $xr4, $xr0, 4
-; LA32-NEXT: xvinsve0.w $xr3, $xr4, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 6
-; LA32-NEXT: xvinsve0.w $xr1, $xr2, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 2
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 4
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr2, 6
-; LA32-NEXT: xvinsve0.w $xr1, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr3, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -1255,109 +569,9 @@ entry:
define void @vmulwod_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1373,125 +587,13 @@ entry:
}
define void @vmulwod_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1505,65 +607,13 @@ entry:
}
define void @vmulwod_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 1
-; LA32-NEXT: xvori.b $xr4, $xr2, 0
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 3
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 2
-; LA32-NEXT: xvpickve.w $xr3, $xr0, 5
-; LA32-NEXT: xvinsve0.w $xr4, $xr3, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
-; LA32-NEXT: xvinsve0.w $xr4, $xr0, 6
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr4, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -1671,109 +721,9 @@ entry:
define void @vmulwev_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1789,125 +739,13 @@ entry:
}
define void @vmulwev_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu_h:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: ext.w.h $a1, $t8
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: ext.w.h $a1, $t4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu_h:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: ext.w.h $a1, $t8
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t7
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t6
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t5
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: ext.w.h $a1, $t4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -1921,68 +759,13 @@ entry:
}
define void @vmulwev_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: xvrepli.b $xr0, 0
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 0
-; LA32-NEXT: xvpickve.w $xr2, $xr1, 2
-; LA32-NEXT: xvinsve0.w $xr0, $xr2, 2
-; LA32-NEXT: xvpickve.w $xr2, $xr1, 4
-; LA32-NEXT: xvinsve0.w $xr0, $xr2, 4
-; LA32-NEXT: xvpickve.w $xr1, $xr1, 6
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvmul.d $xr0, $xr0, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -2122,109 +905,9 @@ entry:
define void @vmulwod_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -2240,125 +923,13 @@ entry:
}
define void @vmulwod_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu_h:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: bstrpick.w $t0, $t0, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: bstrpick.w $a7, $a7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: bstrpick.w $a6, $a6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: bstrpick.w $a5, $a5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: bstrpick.w $a4, $a4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: bstrpick.w $a3, $a3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: bstrpick.w $a2, $a2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: ext.w.h $a1, $t8
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: ext.w.h $a1, $t4
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: ext.w.h $a1, $t3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: ext.w.h $a1, $t2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: ext.w.h $a1, $t1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu_h:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: bstrpick.d $t0, $t0, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: bstrpick.d $a7, $a7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: ext.w.h $a1, $t8
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t7
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t6
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t5
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: ext.w.h $a1, $t4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: ext.w.h $a1, $t3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: ext.w.h $a1, $t2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: ext.w.h $a1, $t1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -2372,69 +943,13 @@ entry:
}
define void @vmulwod_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu_w:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvrepli.b $xr2, 0
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr2, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu_w:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: bstrpick.d $a4, $a4, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: bstrpick.d $a3, $a3, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: bstrpick.d $a2, $a2, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr0, $t0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a7, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a6, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a5, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -2574,109 +1089,9 @@ entry:
define void @vmulwev_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -2692,125 +1107,13 @@ entry:
}
define void @vmulwev_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_w_hu_h_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA32-NEXT: ext.w.h $t0, $t0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: ext.w.h $a7, $a7
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: ext.w.h $a6, $a6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: ext.w.h $a5, $a5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: ext.w.h $a1, $a1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_w_hu_h_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 6
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 4
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 2
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 0
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 0
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 6
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 4
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 2
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 0
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 6
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 4
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 2
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 0
-; LA64-NEXT: ext.w.h $t0, $t0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: ext.w.h $a7, $a7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: ext.w.h $a6, $a6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: ext.w.h $a5, $a5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: ext.w.h $a4, $a4
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: ext.w.h $a3, $a3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: ext.w.h $a2, $a2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_w_hu_h_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -2824,68 +1127,13 @@ entry:
}
define void @vmulwev_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_d_wu_w_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvrepli.b $xr0, 0
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 0
-; LA32-NEXT: xvpickve.w $xr3, $xr1, 2
-; LA32-NEXT: xvinsve0.w $xr0, $xr3, 2
-; LA32-NEXT: xvpickve.w $xr3, $xr1, 4
-; LA32-NEXT: xvinsve0.w $xr0, $xr3, 4
-; LA32-NEXT: xvpickve.w $xr1, $xr1, 6
-; LA32-NEXT: xvinsve0.w $xr0, $xr1, 6
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr0
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_d_wu_w_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 6
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 4
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 0
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 6
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 4
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_d_wu_w_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -3025,109 +1273,9 @@ entry:
define void @vmulwod_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwod_h_bu_b_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr3, $a1, 0
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 15
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
-; CHECK-NEXT: ext.w.b $a1, $a1
-; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 1
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 3
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 5
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 7
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 9
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 11
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 13
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
-; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 15
-; CHECK-NEXT: andi $a1, $a1, 255
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2
-; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -3143,125 +1291,13 @@ entry:
}
define void @vmulwod_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_w_hu_h_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA32-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA32-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA32-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA32-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA32-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA32-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA32-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA32-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA32-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA32-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA32-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA32-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA32-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA32-NEXT: ext.w.h $t0, $t0
-; LA32-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA32-NEXT: ext.w.h $a7, $a7
-; LA32-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA32-NEXT: ext.w.h $a6, $a6
-; LA32-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA32-NEXT: ext.w.h $a5, $a5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA32-NEXT: ext.w.h $a4, $a4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA32-NEXT: ext.w.h $a3, $a3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: ext.w.h $a2, $a2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: ext.w.h $a1, $a1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA32-NEXT: bstrpick.w $a1, $t8, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t7, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t6, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t5, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: bstrpick.w $a1, $t4, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: bstrpick.w $a1, $t3, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: bstrpick.w $a1, $t2, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: bstrpick.w $a1, $t1, 15, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_w_hu_h_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpermi.d $xr2, $xr0, 14
-; LA64-NEXT: vpickve2gr.h $a1, $vr2, 7
-; LA64-NEXT: vpickve2gr.h $a2, $vr2, 5
-; LA64-NEXT: vpickve2gr.h $a3, $vr2, 3
-; LA64-NEXT: vpickve2gr.h $a4, $vr2, 1
-; LA64-NEXT: vpickve2gr.h $a5, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $a6, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $a7, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t0, $vr0, 1
-; LA64-NEXT: xvpermi.d $xr0, $xr1, 14
-; LA64-NEXT: vpickve2gr.h $t1, $vr0, 7
-; LA64-NEXT: vpickve2gr.h $t2, $vr0, 5
-; LA64-NEXT: vpickve2gr.h $t3, $vr0, 3
-; LA64-NEXT: vpickve2gr.h $t4, $vr0, 1
-; LA64-NEXT: vpickve2gr.h $t5, $vr1, 7
-; LA64-NEXT: vpickve2gr.h $t6, $vr1, 5
-; LA64-NEXT: vpickve2gr.h $t7, $vr1, 3
-; LA64-NEXT: vpickve2gr.h $t8, $vr1, 1
-; LA64-NEXT: ext.w.h $t0, $t0
-; LA64-NEXT: vinsgr2vr.w $vr0, $t0, 0
-; LA64-NEXT: ext.w.h $a7, $a7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a7, 1
-; LA64-NEXT: ext.w.h $a6, $a6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a6, 2
-; LA64-NEXT: ext.w.h $a5, $a5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a5, 3
-; LA64-NEXT: ext.w.h $a4, $a4
-; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 0
-; LA64-NEXT: ext.w.h $a3, $a3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA64-NEXT: ext.w.h $a2, $a2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
-; LA64-NEXT: bstrpick.d $a1, $t8, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t7, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t6, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t5, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: bstrpick.d $a1, $t4, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $t3, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $t2, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: bstrpick.d $a1, $t1, 15, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvmul.w $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_w_hu_h_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
%vb = load <16 x i16>, ptr %b
@@ -3275,69 +1311,13 @@ entry:
}
define void @vmulwod_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_d_wu_w_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 0
-; LA32-NEXT: srai.w $a4, $a4, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2
-; LA32-NEXT: srai.w $a3, $a3, 31
-; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: srai.w $a2, $a2, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: srai.w $a1, $a1, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 1
-; LA32-NEXT: xvrepli.b $xr3, 0
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 0
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 3
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 2
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 5
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 4
-; LA32-NEXT: xvpickve.w $xr0, $xr1, 7
-; LA32-NEXT: xvinsve0.w $xr3, $xr0, 6
-; LA32-NEXT: xvmul.d $xr0, $xr2, $xr3
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_d_wu_w_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
-; LA64-NEXT: xvpickve2gr.w $a5, $xr1, 3
-; LA64-NEXT: xvpickve2gr.w $a6, $xr1, 1
-; LA64-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA64-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA64-NEXT: vinsgr2vr.d $vr0, $a4, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a3, 1
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
-; LA64-NEXT: bstrpick.d $a1, $t0, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a7, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; LA64-NEXT: bstrpick.d $a1, $a6, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: bstrpick.d $a1, $a5, 31, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvmul.d $xr0, $xr1, $xr2
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_d_wu_w_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
index cd83c1dff652f..19b5ab50eef95 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll
@@ -7,11 +7,7 @@ define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -31,11 +27,7 @@ define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -55,11 +47,7 @@ define void @vmulwev_d_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -155,13 +143,7 @@ define void @vmulwod_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 49
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vshuf4i.b $vr1, $vr1, 49
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -181,13 +163,7 @@ define void @vmulwod_w_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 49
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vshuf4i.h $vr1, $vr1, 49
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,13 +183,7 @@ define void @vmulwod_d_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 49
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 49
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -308,13 +278,8 @@ define void @vmulwev_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr3, $vr0, $vr1
-; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -332,15 +297,9 @@ entry:
define void @vmulwev_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_w_hu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vori.b $vr4, $vr0, 0
-; CHECK-NEXT: vshuf.h $vr4, $vr3, $vr1
-; CHECK-NEXT: vshuf.h $vr0, $vr3, $vr2
-; CHECK-NEXT: vmul.w $vr0, $vr4, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -358,15 +317,9 @@ entry:
define void @vmulwev_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_d_wu:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vori.b $vr4, $vr0, 0
-; CHECK-NEXT: vshuf.w $vr4, $vr3, $vr1
-; CHECK-NEXT: vshuf.w $vr0, $vr3, $vr2
-; CHECK-NEXT: vmul.d $vr0, $vr4, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -442,10 +395,7 @@ define void @vmulwod_h_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.b $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -465,10 +415,7 @@ define void @vmulwod_w_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -488,10 +435,7 @@ define void @vmulwod_d_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vpackod.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -566,14 +510,8 @@ define void @vmulwev_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_bu_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr3, $vr0, $vr1
-; CHECK-NEXT: vslli.h $vr1, $vr2, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -592,14 +530,8 @@ define void @vmulwev_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_w_hu_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr3, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr2, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -618,14 +550,8 @@ define void @vmulwev_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_d_wu_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vld $vr2, $a2, 0
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr3, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr2, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -714,12 +640,7 @@ define void @vmulwod_h_bu_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.b $vr1, $vr1, 49
-; CHECK-NEXT: vslli.h $vr1, $vr1, 8
-; CHECK-NEXT: vsrai.h $vr1, $vr1, 8
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -739,12 +660,7 @@ define void @vmulwod_w_hu_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.h $vr1, $vr1, 49
-; CHECK-NEXT: vslli.w $vr1, $vr1, 16
-; CHECK-NEXT: vsrai.w $vr1, $vr1, 16
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -764,12 +680,7 @@ define void @vmulwod_d_wu_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 49
-; CHECK-NEXT: vslli.d $vr1, $vr1, 32
-; CHECK-NEXT: vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -858,13 +769,7 @@ define void @vmulwev_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr1, $vr2
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -884,13 +789,7 @@ define void @vmulwev_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI25_0)
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.h $vr2, $vr3, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr2
+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -910,13 +809,7 @@ define void @vmulwev_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI26_0)
-; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI26_0)
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vrepli.b $vr3, 0
-; CHECK-NEXT: vshuf.w $vr2, $vr3, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr2
+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1005,12 +898,7 @@ define void @vmulwod_h_bu_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 49
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.b $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1030,12 +918,7 @@ define void @vmulwod_w_hu_h_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 49
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.h $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -1055,12 +938,7 @@ define void @vmulwod_d_wu_w_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 49
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vpackod.w $vr1, $vr2, $vr1
-; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
More information about the llvm-branch-commits
mailing list