[llvm-branch-commits] [llvm] [LoongArch] Custom legalize vector_shuffle which elements from halves or quarters (PR #165670)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 30 00:57:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: ZhaoQi (zhaoqi5)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/165670.diff
3 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+102)
- (modified) llvm/test/CodeGen/LoongArch/lsx/shufflevector-halves-quarters.ll (+34-32)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll (+29-29)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..8564fb1fe5560 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1738,6 +1738,105 @@ lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
}
+/// Lower VECTOR_SHUFFLE whose result elements is all undef except for the
+/// first two or four elements which are from the half or quarter parts of the
+/// source vector.
+///
+/// It is possible to do optimization for VECTOR_SHUFFLE whose mask likes:
+/// <i, i+n/2, -1, ...>
+/// where n is the number of elements in the vector and i is in [0, n/2). Or:
+/// <i, i+4, i+8, i+12, -1, ...> (Only v16i8, and the first four can be undef)
+/// where i is in [0, 4).
+///
+/// For example: <0, 4, -1, ...> or <0, 4, 8, 12, -1, ...>, which appears when
+/// legalizing ISD::TRUNCATE in ReplaceNodeResults().
+static SDValue
+lowerVECTOR_SHUFFLE_HalvesOrQuarters(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ if (VT != MVT::v16i8 && VT != MVT::v8i16)
+ return SDValue();
+
+ int HalfSize = Mask.size() / 2;
+ int QuarterSize = Mask.size() / 4;
+ MVT GRLenVT = Subtarget.getGRLenVT();
+
+ auto allUndefFrom = [&](unsigned idx) -> bool {
+ return llvm::all_of(Mask.drop_front(idx), [](int M) { return M == -1; });
+ };
+
+ auto buildShuffled = [&](MVT CastVT, ArrayRef<int> ShuffleMask) {
+ SDValue Cast = DAG.getBitcast(CastVT, V1);
+ SDValue Shuf = DAG.getVectorShuffle(CastVT, DL, Cast, Cast, ShuffleMask);
+ return DAG.getBitcast(VT, Shuf);
+ };
+
+ // Check pattern: <i, i+HalfSize, -1, ...>
+ int M0 = Mask[0], M1 = Mask[1];
+ if (M0 >= 0 && M0 < HalfSize && M1 == M0 + HalfSize && allUndefFrom(2)) {
+ SDValue SrcVec = V1;
+ // Shuffle vector for various masks to place needed elements at front.
+ if (M0 >= QuarterSize && M0 < QuarterSize + 2)
+ SrcVec = buildShuffled(MVT::v4i32, {1, 0, 3, 2});
+ else if (M0 >= 2 && M0 < 4) // Only v16i8 meets this.
+ SrcVec = buildShuffled(MVT::v8i16, {1, 0, 3, 2, 5, 4, 7, 6});
+ else if (M0 >= 6 && M0 < 8) // Only v16i8 meets this.
+ SrcVec = buildShuffled(MVT::v8i16, {3, 2, 1, 0, 7, 6, 5, 4});
+
+ // Broadcast the needed high part elements.
+ SDValue VecHi = DAG.getNode(LoongArchISD::VREPLVEI, DL, MVT::v4i32,
+ DAG.getBitcast(MVT::v4i32, SrcVec),
+ DAG.getConstant(2, DL, GRLenVT));
+
+ unsigned Opc = (M0 % 2) ? LoongArchISD::VPACKOD : LoongArchISD::VPACKEV;
+ return DAG.getNode(Opc, DL, VT, DAG.getBitcast(VT, VecHi), SrcVec);
+ }
+
+ // Only consider quarter cases for v16i8.
+ if (VT != MVT::v16i8)
+ return SDValue();
+
+ // Check pattern: <i, i+4, i+8, i+12, -1, ...>
+ // Still succeeds even if the first four elements have undef.
+ bool FromQuarters = false;
+ int First = -1;
+ for (int i = 0; i < QuarterSize && !FromQuarters; ++i) {
+ FromQuarters = llvm::all_of(llvm::seq<int>(0, 4), [&](int j) {
+ return Mask[j] == -1 || Mask[j] == i + j * 4;
+ });
+ if (FromQuarters)
+ First = i;
+ }
+
+ if (FromQuarters && allUndefFrom(4)) {
+ SmallVector<int, 8> ShufMask =
+ (First < 2) ? SmallVector<int, 8>{0, 2, 1, 3, 4, 6, 5, 7}
+ : SmallVector<int, 8>{1, 3, 0, 2, 5, 7, 4, 6};
+ SmallVector<int, 16> ExtractMask =
+ (First % 2) ? SmallVector<int, 16>{1, 3, 0, 2, 5, 7, 4, 6,
+ 9, 11, 8, 10, 13, 15, 12, 14}
+ : SmallVector<int, 16>{0, 2, 1, 3, 4, 6, 5, 7,
+ 8, 10, 9, 11, 12, 14, 13, 15};
+
+ // Shuffle vector for various masks to place needed elements at front.
+ MVT ShufVT = MVT::v8i16;
+ SDValue SrcVec = buildShuffled(ShufVT, ShufMask);
+ SDValue Extract = DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, ExtractMask);
+
+ // Broadcast the needed high part elements.
+ SDValue VecHi = DAG.getNode(LoongArchISD::VREPLVEI, DL, ShufVT,
+ DAG.getBitcast(ShufVT, Extract),
+ DAG.getConstant(4, DL, GRLenVT));
+
+ unsigned Opc = (First % 2) ? LoongArchISD::VPACKOD : LoongArchISD::VPACKEV;
+ SDValue Result =
+ DAG.getNode(Opc, DL, ShufVT, VecHi, DAG.getBitcast(ShufVT, Extract));
+ return DAG.getBitcast(VT, Result);
+ }
+
+ return SDValue();
+}
+
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
///
/// VPACKEV interleaves the even elements from each vector.
@@ -2044,6 +2143,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLE_HalvesOrQuarters(DL, Mask, VT, V1, DAG,
+ Subtarget)))
+ return Result;
// TODO: This comment may be enabled in the future to better match the
// pattern for instruction selection.
diff --git a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-halves-quarters.ll b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-halves-quarters.ll
index 2a0a107a2b76e..946a4e5524bc0 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-halves-quarters.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-halves-quarters.ll
@@ -6,9 +6,8 @@ define void @shufflevector_halves_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -23,9 +22,9 @@ define void @shufflevector_halves_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_b_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 177
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -40,9 +39,9 @@ define void @shufflevector_halves_b_2(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_b_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 177
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,9 +56,9 @@ define void @shufflevector_halves_b_3(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_b_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -74,10 +73,9 @@ define void @shufflevector_halves_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vst $vr1, $a0, 0
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i16>, ptr %a
@@ -91,10 +89,10 @@ define void @shufflevector_halves_h_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_halves_h_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vst $vr1, $a0, 0
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 177
+; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
+; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i16>, ptr %a
@@ -108,9 +106,10 @@ define void @shufflevector_quarters_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_quarters_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 216
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 216
+; CHECK-NEXT: vreplvei.h $vr1, $vr0, 4
+; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -125,9 +124,10 @@ define void @shufflevector_quarters_b_1(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_quarters_b_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 216
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 141
+; CHECK-NEXT: vreplvei.h $vr1, $vr0, 4
+; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -142,9 +142,10 @@ define void @shufflevector_quarters_b_2(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_quarters_b_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 141
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 216
+; CHECK-NEXT: vreplvei.h $vr1, $vr0, 4
+; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -159,9 +160,10 @@ define void @shufflevector_quarters_b_3(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_quarters_b_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 141
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 141
+; CHECK-NEXT: vreplvei.h $vr1, $vr0, 4
+; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
index 314350acd23d6..9b9016b4e5972 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -28,20 +28,18 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
; LA32-LABEL: load_trunc_2i64_to_2i16:
; LA32: # %bb.0:
; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; LA32-NEXT: vshuf.h $vr1, $vr0, $vr0
-; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0
+; LA32-NEXT: vreplvei.w $vr1, $vr0, 2
+; LA32-NEXT: vpackev.h $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_trunc_2i64_to_2i16:
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; LA64-NEXT: vshuf.h $vr1, $vr0, $vr0
-; LA64-NEXT: vstelm.w $vr1, $a1, 0, 0
+; LA64-NEXT: vreplvei.w $vr1, $vr0, 2
+; LA64-NEXT: vpackev.h $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
; LA64-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i16>
@@ -53,18 +51,16 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
; LA32-LABEL: load_trunc_2i64_to_2i8:
; LA32: # %bb.0:
; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA32-NEXT: vreplvei.w $vr1, $vr0, 2
+; LA32-NEXT: vpackev.b $vr0, $vr1, $vr0
; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_trunc_2i64_to_2i8:
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; LA64-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA64-NEXT: vreplvei.w $vr1, $vr0, 2
+; LA64-NEXT: vpackev.b $vr0, $vr1, $vr0
; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
; LA64-NEXT: ret
%a = load <2 x i64>, ptr %ptr
@@ -100,9 +96,10 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
; LA32-LABEL: load_trunc_4i32_to_4i8:
; LA32: # %bb.0:
; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA32-NEXT: vshuf4i.h $vr0, $vr0, 216
+; LA32-NEXT: vshuf4i.b $vr0, $vr0, 216
+; LA32-NEXT: vreplvei.h $vr1, $vr0, 4
+; LA32-NEXT: vpackev.h $vr0, $vr1, $vr0
; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: ret
@@ -110,9 +107,10 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
; LA64-LABEL: load_trunc_4i32_to_4i8:
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; LA64-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA64-NEXT: vshuf4i.h $vr0, $vr0, 216
+; LA64-NEXT: vshuf4i.b $vr0, $vr0, 216
+; LA64-NEXT: vreplvei.h $vr1, $vr0, 4
+; LA64-NEXT: vpackev.h $vr0, $vr1, $vr0
; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
; LA64-NEXT: ret
%a = load <4 x i32>, ptr %ptr
@@ -174,21 +172,23 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI7_0)
-; LA32-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI7_0)
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vshuf4i.h $vr0, $vr0, 216
+; LA32-NEXT: vshuf4i.b $vr0, $vr0, 216
+; LA32-NEXT: vreplvei.h $vr1, $vr0, 4
+; LA32-NEXT: vpackev.h $vr0, $vr1, $vr0
; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_trunc_2i32_to_2i8:
; LA64: # %bb.0:
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; LA64-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.h $vr0, $vr0, 216
+; LA64-NEXT: vshuf4i.b $vr0, $vr0, 216
+; LA64-NEXT: vreplvei.h $vr1, $vr0, 4
+; LA64-NEXT: vpackev.h $vr0, $vr1, $vr0
; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
; LA64-NEXT: ret
%a = load <2 x i32>, ptr %ptr
``````````
</details>
https://github.com/llvm/llvm-project/pull/165670
More information about the llvm-branch-commits
mailing list