[llvm] [LoongArch] Optimize extractelement containing variable index for lsx (PR #156792)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 20:28:38 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/156792
None
>From e53308bbaecce66eea995a6127767f8059fd95ad Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Wed, 3 Sep 2025 21:03:26 +0800
Subject: [PATCH] [LoongArch] Optimize extractelement containing variable index
for lsx
---
.../LoongArch/LoongArchISelLowering.cpp | 49 ++++++++++---------
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 18 -------
.../lsx/ir-instruction/extractelement.ll | 36 ++++++--------
3 files changed, 41 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..0fffa50db7dd0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -287,7 +287,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Legal);
@@ -421,12 +421,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasExtLSX()) {
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BITCAST);
- }
-
- // Set DAG combine for 'LASX' feature.
-
- if (Subtarget.hasExtLASX())
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ }
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2834,37 +2830,47 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
- assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
-
if (isa<ConstantSDNode>(Idx))
return Op;
switch (VecTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Unexpected type");
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return Op;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
case MVT::v32i8:
case MVT::v16i16:
case MVT::v4i64:
case MVT::v4f64: {
- // Extract the high half subvector and place it to the low half of a new
- // vector. It doesn't matter what the high half of the new vector is.
- EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
- SDValue VecHi =
- DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
- SDValue TmpVec =
- DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
- VecHi, DAG.getConstant(0, DL, GRLenVT));
+ SDValue TmpVec;
+ if (VecTy.is256BitVector()) {
+ // Extract the high half subvector and place it to the low half of a new
+ // vector. It doesn't matter what the high half of the new vector is.
+ EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue VecHi = DAG.getExtractSubvector(DL, HalfTy, Vec,
+ HalfTy.getVectorNumElements());
+ TmpVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
+ VecHi, DAG.getConstant(0, DL, GRLenVT));
+ }
// Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
// of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
// desired element.
SDValue IdxCp =
DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
- SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
+ SDValue IdxVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL,
+ (VecTy.is128BitVector() ? MVT::v4f32 : MVT::v8f32), IdxCp);
SDValue MaskVec =
- DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
- SDValue ResVec =
- DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
+ DAG.getBitcast(VecTy.changeVectorElementTypeToInteger(), IdxVec);
+ SDValue ResVec = DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec,
+ (VecTy.is128BitVector() ? Vec : TmpVec), Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
DAG.getConstant(0, DL, GRLenVT));
@@ -6254,12 +6260,11 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
MVT EltVT = N->getSimpleValueType(0);
SDValue Vec = N->getOperand(0);
- EVT VecTy = Vec->getValueType(0);
SDValue Idx = N->getOperand(1);
unsigned IdxOp = Idx.getOpcode();
SDLoc DL(N);
- if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
+ if (isa<ConstantSDNode>(Idx))
return SDValue();
// Combine:
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index eb7120ffb41a6..deb86513859f5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2086,24 +2086,6 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
(f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
// Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
- i64:$rk),
- sub_32)),
- GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
- i64:$rk),
- sub_32)),
- GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
- sub_32)),
- GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
- sub_64)),
- GPR)>;
def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
(f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index c9c95f19c26f8..73fda3174a224 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -77,11 +77,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_16xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 24
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 %idx
@@ -93,11 +91,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 16
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.h $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 %idx
@@ -109,10 +105,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.w $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 %idx
@@ -124,10 +119,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.d $a0, $fa0
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.d $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%e = extractelement <2 x i64> %v, i32 %idx
@@ -139,8 +133,7 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
@@ -153,8 +146,7 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
More information about the llvm-commits
mailing list