[llvm] [LoongArch] Custom lower vector trunc to vector shuffle (PR #130938)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 02:59:59 PDT 2025


https://github.com/tangaac created https://github.com/llvm/llvm-project/pull/130938

None

>From 333a94557d35c587f507ee1b9004a26226b7a520 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Mon, 10 Mar 2025 14:11:22 +0800
Subject: [PATCH 1/2] test for vector-trunc

---
 llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll | 238 +++++++++++++++++++
 1 file changed, 238 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll

diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
new file mode 100644
index 0000000000000..12f6c30b1512e
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i64_to_2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT:    st.w $a0, $sp, 4
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT:    st.w $a0, $sp, 0
+; CHECK-NEXT:    ld.d $a0, $sp, 0
+; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i64>, ptr %ptr
+	%trunc = trunc <2 x i64> %a to <2 x i32>
+	store <2 x i32> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i64_to_2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT:    st.h $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.w $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i64>, ptr %ptr
+	%trunc = trunc <2 x i64> %a to <2 x i16>
+	store <2 x i16> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i64_to_2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i64>, ptr %ptr
+	%trunc = trunc <2 x i64> %a to <2 x i8>
+	store <2 x i8> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_4i32_to_4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; CHECK-NEXT:    st.h $a0, $sp, 6
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; CHECK-NEXT:    st.h $a0, $sp, 4
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT:    st.h $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $sp, 0
+; CHECK-NEXT:    ld.d $a0, $sp, 0
+; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <4 x i32>, ptr %ptr
+	%trunc = trunc <4 x i32> %a to <4 x i16>
+	store <4 x i16> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_4i32_to_4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; CHECK-NEXT:    st.b $a0, $sp, 3
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; CHECK-NEXT:    st.b $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.w $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <4 x i32>, ptr %ptr
+	%trunc = trunc <4 x i32> %a to <4 x i8>
+	store <4 x i8> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_8i16_to_8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; CHECK-NEXT:    st.b $a0, $sp, 7
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; CHECK-NEXT:    st.b $a0, $sp, 6
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; CHECK-NEXT:    st.b $a0, $sp, 5
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; CHECK-NEXT:    st.b $a0, $sp, 4
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; CHECK-NEXT:    st.b $a0, $sp, 3
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; CHECK-NEXT:    st.b $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    ld.d $a0, $sp, 0
+; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <8 x i16>, ptr %ptr
+	%trunc = trunc <8 x i16> %a to <8 x i8>
+	store <8 x i8> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i32_to_2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT:    st.h $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.w $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i32>, ptr %ptr
+	%trunc = trunc <2 x i32> %a to <2 x i16>
+	store <2 x i16> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i32_to_2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i32>, ptr %ptr
+	%trunc = trunc <2 x i32> %a to <2 x i8>
+	store <2 x i8> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_4i16_to_4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; CHECK-NEXT:    st.b $a0, $sp, 3
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; CHECK-NEXT:    st.b $a0, $sp, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    st.w $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <4 x i16>, ptr %ptr
+	%trunc = trunc <4 x i16> %a to <4 x i8>
+	store <4 x i8> %trunc, ptr %dst
+	ret void
+}
+
+define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_trunc_2i16_to_2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; CHECK-NEXT:    st.b $a0, $sp, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.b $a0, $sp, 0
+; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    st.h $a0, $a1, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+	%a = load <2 x i16>, ptr %ptr
+	%trunc = trunc <2 x i16> %a to <2 x i8>
+	store <2 x i8> %trunc, ptr %dst
+	ret void
+}
+

>From 84c4107867087e1efb5e704981b4b43be64ac01f Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 12 Mar 2025 17:52:33 +0800
Subject: [PATCH 2/2] Custom lower vector trunc to vector shuffle

---
 .../LoongArch/LoongArchISelLowering.cpp       |  44 ++++++
 llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll  | 127 +++---------------
 2 files changed, 66 insertions(+), 105 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index dceb3c682d2df..cba75d6885381 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -297,6 +297,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
     setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
     setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
+
+    for (MVT VT :
+         {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
+          MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
+      setOperationAction(ISD::TRUNCATE, VT, Custom);
+    }
   }
 
   // Set operations for 'LASX' feature.
@@ -3264,6 +3270,44 @@ void LoongArchTargetLowering::ReplaceNodeResults(
     replaceCMP_XCHG_128Results(N, Results, DAG);
     break;
   }
+  case ISD::TRUNCATE: {
+    MVT VT = N->getSimpleValueType(0);
+    if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+      return;
+
+    MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
+    SDValue In = N->getOperand(0);
+    EVT InVT = In.getValueType();
+    EVT InEltVT = InVT.getVectorElementType();
+    EVT EltVT = VT.getVectorElementType();
+    unsigned MinElts = VT.getVectorNumElements();
+    unsigned WidenNumElts = WidenVT.getVectorNumElements();
+    unsigned InBits = InVT.getSizeInBits();
+
+    if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
+      if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
+        int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
+        SmallVector<int, 16> TruncMask(WidenNumElts, -1);
+        for (unsigned I = 0; I < MinElts; ++I)
+          TruncMask[I] = Scale * I;
+
+        unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
+        MVT SVT = In.getSimpleValueType().getScalarType();
+        MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
+        SDValue WidenIn =
+            DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
+                        DAG.getVectorIdxConstant(0, DL));
+        assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
+               "Illegal vector type in truncation");
+        WidenIn = DAG.getBitcast(WidenVT, WidenIn);
+        Results.push_back(
+            DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
+        return;
+      }
+    }
+
+    break;
+  }
   }
 }
 
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
index 12f6c30b1512e..80226e356f38c 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -4,16 +4,10 @@
 define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i64_to_2i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT:    st.w $a0, $sp, 4
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 8
 ; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT:    st.w $a0, $sp, 0
-; CHECK-NEXT:    ld.d $a0, $sp, 0
 ; CHECK-NEXT:    st.d $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i64>, ptr %ptr
 	%trunc = trunc <2 x i64> %a to <2 x i32>
@@ -24,17 +18,12 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) {
 define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i64_to_2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT:    st.h $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT:    st.h $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT:    vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr1, 0
 ; CHECK-NEXT:    st.w $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i64>, ptr %ptr
 	%trunc = trunc <2 x i64> %a to <2 x i16>
@@ -45,17 +34,12 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) {
 define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i64_to_2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
 ; CHECK-NEXT:    st.h $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i64>, ptr %ptr
 	%trunc = trunc <2 x i64> %a to <2 x i8>
@@ -66,20 +50,10 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) {
 define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_4i32_to_4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT:    st.h $a0, $sp, 6
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 2
-; CHECK-NEXT:    st.h $a0, $sp, 4
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT:    st.h $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT:    st.h $a0, $sp, 0
-; CHECK-NEXT:    ld.d $a0, $sp, 0
+; CHECK-NEXT:    vpickev.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; CHECK-NEXT:    st.d $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <4 x i32>, ptr %ptr
 	%trunc = trunc <4 x i32> %a to <4 x i16>
@@ -90,21 +64,12 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) {
 define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_4i32_to_4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT:    st.b $a0, $sp, 3
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 2
-; CHECK-NEXT:    st.b $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; CHECK-NEXT:    st.w $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <4 x i32>, ptr %ptr
 	%trunc = trunc <4 x i32> %a to <4 x i8>
@@ -115,28 +80,10 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) {
 define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_8i16_to_8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT:    st.b $a0, $sp, 7
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT:    st.b $a0, $sp, 6
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT:    st.b $a0, $sp, 5
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT:    st.b $a0, $sp, 4
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT:    st.b $a0, $sp, 3
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT:    st.b $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    ld.d $a0, $sp, 0
+; CHECK-NEXT:    vpickev.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; CHECK-NEXT:    st.d $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <8 x i16>, ptr %ptr
 	%trunc = trunc <8 x i16> %a to <8 x i8>
@@ -147,18 +94,11 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) {
 define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i32_to_2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ld.d $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT:    st.h $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT:    st.h $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vshuf4i.h $vr0, $vr0, 8
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; CHECK-NEXT:    st.w $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i32>, ptr %ptr
 	%trunc = trunc <2 x i32> %a to <2 x i16>
@@ -169,18 +109,13 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) {
 define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i32_to_2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ld.d $a0, $a0, 0
-; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
 ; CHECK-NEXT:    st.h $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i32>, ptr %ptr
 	%trunc = trunc <2 x i32> %a to <2 x i8>
@@ -191,22 +126,11 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) {
 define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_4i16_to_4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ld.d $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT:    st.b $a0, $sp, 3
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT:    st.b $a0, $sp, 2
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vpickev.b $vr0, $vr0, $vr0
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; CHECK-NEXT:    st.w $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <4 x i16>, ptr %ptr
 	%trunc = trunc <4 x i16> %a to <4 x i8>
@@ -217,18 +141,11 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) {
 define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_trunc_2i16_to_2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi.d $sp, $sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT:    st.b $a0, $sp, 1
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT:    st.b $a0, $sp, 0
-; CHECK-NEXT:    vld $vr0, $sp, 0
+; CHECK-NEXT:    vshuf4i.b $vr0, $vr0, 8
 ; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
 ; CHECK-NEXT:    st.h $a0, $a1, 0
-; CHECK-NEXT:    addi.d $sp, $sp, 16
 ; CHECK-NEXT:    ret
 	%a = load <2 x i16>, ptr %ptr
 	%trunc = trunc <2 x i16> %a to <2 x i8>



More information about the llvm-commits mailing list