[llvm-branch-commits] [llvm] [LoongArch] Custom legalize vector_shuffle to `xvinsve0.{w/d}` when possible (PR #160857)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Sep 26 04:15:01 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/160857
None
>From f0b8917ccc6109e64d16f2bc4966f2696843b5e7 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Fri, 26 Sep 2025 19:07:22 +0800
Subject: [PATCH] [LoongArch] Custom legalize vector_shuffle to
`xvinsve0.{w/d}` when possible
---
.../LoongArch/LoongArchISelLowering.cpp | 52 ++
.../Target/LoongArch/LoongArchISelLowering.h | 1 +
.../LoongArch/LoongArchLASXInstrInfo.td | 9 +
.../ir-instruction/shuffle-as-xvinsve0.ll | 704 +++---------------
4 files changed, 180 insertions(+), 586 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5d4a8fd080202..194f42995d55a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2317,6 +2317,54 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
}
+// Check if exactly one element of the Mask is replaced by 'Replaced', while
+// all other elements are either 'Base + i' or undef (-1). On success, return
+// the index of the replaced element. Otherwise, just return -1.
+static int checkReplaceOne(ArrayRef<int> Mask, int Base, int Replaced) {
+ int MaskSize = Mask.size();
+ int Idx = -1;
+ for (int i = 0; i < MaskSize; ++i) {
+ if (Mask[i] == Base + i || Mask[i] == -1)
+ continue;
+ if (Mask[i] != Replaced)
+ return -1;
+ if (Idx == -1)
+ Idx = i;
+ else
+ return -1;
+ }
+ return Idx;
+}
+
+/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // LoongArch LASX only supports xvinsve0.{w/d}.
+ if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
+ VT != MVT::v4f64)
+ return SDValue();
+
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ int MaskSize = Mask.size();
+ assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Case 1: the lowest element of V2 replaces one element in V1.
+ int Idx = checkReplaceOne(Mask, 0, MaskSize);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ // Case 2: the lowest element of V1 replaces one element in V2.
+ Idx = checkReplaceOne(Mask, MaskSize, 0);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ return SDValue();
+}
+
/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
@@ -2593,6 +2641,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
@@ -7450,6 +7501,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XVPERM)
NODE_NAME_CASE(XVREPLVE0)
NODE_NAME_CASE(XVREPLVE0Q)
+ NODE_NAME_CASE(XVINSVE0)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index b2fccf59169ff..3e7ea5ebba79e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -151,6 +151,7 @@ enum NodeType : unsigned {
XVPERM,
XVREPLVE0,
XVREPLVE0Q,
+ XVINSVE0,
// Extended vector element extraction
VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..dfcbfff2a9a72 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
+def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>;
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm),
(XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>;
// XVINSVE0_{W/D}
+def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
+def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>;
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
(XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32),
uimm3:$imm)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
index b6c9c4da05e5a..d5a7dbf5d57af 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
;; xvinsve0.w
define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind {
@@ -8,10 +8,8 @@ define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -26,10 +24,8 @@ define void @xvinsve0_v8i32_l_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -44,10 +40,8 @@ define void @xvinsve0_v8i32_l_2(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -62,10 +56,8 @@ define void @xvinsve0_v8i32_l_3(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 3
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -76,52 +68,13 @@ entry:
}
define void @xvinsve0_v8i32_l_4(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_l_4:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_l_4:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_l_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 4
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -131,52 +84,13 @@ entry:
}
define void @xvinsve0_v8i32_l_5(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_l_5:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: ld.w $a1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_l_5:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_l_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 5
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -186,52 +100,13 @@ entry:
}
define void @xvinsve0_v8i32_l_6(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_l_6:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_l_6:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_l_6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 6
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -241,52 +116,13 @@ entry:
}
define void @xvinsve0_v8i32_l_7(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_l_7:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_l_7:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_l_7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 7
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -300,10 +136,8 @@ define void @xvinsve0_v8f32_l(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
@@ -318,10 +152,8 @@ define void @xvinsve0_v8i32_h_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -336,10 +168,8 @@ define void @xvinsve0_v8i32_h_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -354,10 +184,8 @@ define void @xvinsve0_v8i32_h_2(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 2
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -372,10 +200,8 @@ define void @xvinsve0_v8i32_h_3(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 3
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -386,52 +212,13 @@ entry:
}
define void @xvinsve0_v8i32_h_4(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_h_4:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_h_4:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_h_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 4
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -441,52 +228,13 @@ entry:
}
define void @xvinsve0_v8i32_h_5(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_h_5:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_h_5:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_h_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 5
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -496,52 +244,13 @@ entry:
}
define void @xvinsve0_v8i32_h_6(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_h_6:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_h_6:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_h_6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 6
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -551,52 +260,13 @@ entry:
}
define void @xvinsve0_v8i32_h_7(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_h_7:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 5
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_h_7:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_h_7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 7
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -610,10 +280,8 @@ define void @xvinsve0_v8f32_h(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
@@ -629,10 +297,8 @@ define void @xvinsve0_v4i64_l_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -647,10 +313,8 @@ define void @xvinsve0_v4i64_l_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI19_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI19_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -661,44 +325,13 @@ entry:
}
define void @xvinsve0_v4i64_l_2(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_l_2:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_l_2:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_l_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -708,44 +341,13 @@ entry:
}
define void @xvinsve0_v4i64_l_3(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_l_3:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvst $xr1, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_l_3:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: ld.d $a1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_l_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 3
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -759,10 +361,8 @@ define void @xvinsve0_v4f64_l(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI22_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x double>, ptr %a
@@ -777,10 +377,8 @@ define void @xvinsve0_v4i64_h_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI23_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI23_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -795,10 +393,8 @@ define void @xvinsve0_v4i64_h_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 1
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -809,44 +405,13 @@ entry:
}
define void @xvinsve0_v4i64_h_2(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_h_2:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_h_2:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_h_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -856,44 +421,13 @@ entry:
}
define void @xvinsve0_v4i64_h_3(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_h_3:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA32-NEXT: xvst $xr1, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_h_3:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_h_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 3
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -907,10 +441,8 @@ define void @xvinsve0_v4f64_h(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI27_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI27_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x double>, ptr %a
More information about the llvm-branch-commits
mailing list