[llvm] [LoongArch] Custom lower vector trunc to vector shuffle (PR #130938)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 01:08:03 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/130938
>From a6bb03a4dfbe70989c4a89af9f5b0115a625a325 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 12 Mar 2025 17:52:33 +0800
Subject: [PATCH] Custom lower vector trunc to vector shuffle
---
.../LoongArch/LoongArchISelLowering.cpp | 44 ++++++++
llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll | 106 ++++++++----------
2 files changed, 88 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index dceb3c682d2df..cba75d6885381 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -297,6 +297,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
+
+ for (MVT VT :
+ {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
+ MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ }
}
// Set operations for 'LASX' feature.
@@ -3264,6 +3270,44 @@ void LoongArchTargetLowering::ReplaceNodeResults(
replaceCMP_XCHG_128Results(N, Results, DAG);
break;
}
+ case ISD::TRUNCATE: {
+ MVT VT = N->getSimpleValueType(0);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned MinElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned InBits = InVT.getSizeInBits();
+
+ if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
+ if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
+ int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
+ SmallVector<int, 16> TruncMask(WidenNumElts, -1);
+ for (unsigned I = 0; I < MinElts; ++I)
+ TruncMask[I] = Scale * I;
+
+ unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
+ MVT SVT = In.getSimpleValueType().getScalarType();
+ MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
+ SDValue WidenIn =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
+ DAG.getVectorIdxConstant(0, DL));
+ assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
+ "Illegal vector type in truncation");
+ WidenIn = DAG.getBitcast(WidenVT, WidenIn);
+ Results.push_back(
+ DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
+ return;
+ }
+ }
+
+ break;
+ }
}
}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
index a42c3002f46d7..480c76574715d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -5,10 +5,9 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i64_to_2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: st.w $a0, $a1, 4
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i32>
@@ -20,10 +19,11 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i64_to_2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $a1, 2
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
+; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i16>
@@ -35,10 +35,11 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i64_to_2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $a1, 1
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i8>
@@ -50,14 +51,9 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_4i32_to_4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $a1, 6
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $a1, 4
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $a1, 2
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i16>
@@ -69,14 +65,11 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_4i32_to_4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $a1, 3
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $a1, 2
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $a1, 1
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i8>
@@ -88,22 +81,9 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_8i16_to_8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $a1, 7
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $a1, 6
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $a1, 5
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $a1, 4
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $a1, 3
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $a1, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $a1, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %ptr
%trunc = trunc <8 x i16> %a to <8 x i8>
@@ -114,10 +94,11 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i32_to_2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld.w $a2, $a0, 4
-; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: st.h $a2, $a1, 2
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i16>
@@ -128,10 +109,13 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i32_to_2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld.w $a2, $a0, 4
-; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: st.b $a2, $a1, 1
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i8>
@@ -142,14 +126,11 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_4i16_to_4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld.h $a2, $a0, 6
-; CHECK-NEXT: ld.h $a3, $a0, 4
-; CHECK-NEXT: ld.h $a4, $a0, 2
-; CHECK-NEXT: ld.h $a0, $a0, 0
-; CHECK-NEXT: st.b $a2, $a1, 3
-; CHECK-NEXT: st.b $a3, $a1, 2
-; CHECK-NEXT: st.b $a4, $a1, 1
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: ld.d $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %ptr
%trunc = trunc <4 x i16> %a to <4 x i8>
@@ -160,10 +141,11 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: load_trunc_2i16_to_2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld.h $a2, $a0, 2
-; CHECK-NEXT: ld.h $a0, $a0, 0
-; CHECK-NEXT: st.b $a2, $a1, 1
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %ptr
%trunc = trunc <2 x i16> %a to <2 x i8>
More information about the llvm-commits
mailing list