[llvm] 47c88bc - [LoongArch] Fix LASX vector_extract codegen
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 28 21:51:24 PST 2023
Author: wanglei
Date: 2023-12-29T13:48:53+08:00
New Revision: 47c88bcd5de91522241cca1aaa1b7762ceb01394
URL: https://github.com/llvm/llvm-project/commit/47c88bcd5de91522241cca1aaa1b7762ceb01394
DIFF: https://github.com/llvm/llvm-project/commit/47c88bcd5de91522241cca1aaa1b7762ceb01394.diff
LOG: [LoongArch] Fix LASX vector_extract codegen
Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx.
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4794a131edae05..80853ee3198771 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Legal);
@@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerWRITE_REGISTER(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
@@ -513,6 +515,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
+SDValue
+LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VecTy = Op->getOperand(0)->getValueType(0);
+ SDValue Idx = Op->getOperand(1);
+ EVT EltTy = VecTy.getVectorElementType();
+ unsigned NumElts = VecTy.getVectorNumElements();
+
+ if (isa<ConstantSDNode>(Idx) &&
+ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
+ EltTy == MVT::f64 ||
+ cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2))
+ return Op;
+
+ return SDValue();
+}
+
SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 2d73a73949461b..6f8878f9ccd519 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -279,6 +279,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ec6983d0f4871f..9b7a346888112e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
-def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
-def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>;
-def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>;
-
-// Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
- i64:$rk),
- sub_32)),
- GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj,
- i64:$rk),
- sub_32)),
- GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)),
- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk),
- sub_32)),
- GPR)>;
-def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk),
- sub_64)),
- GPR)>;
-def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>;
-def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
+def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
+ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
+def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
+ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
+def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)),
+ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>;
+def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)),
+ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>;
// vselect
def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
index 78f584cd09a8fb..fc2929d8e6db33 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: ori $a0, $zero, 7
-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0
+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
+; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: ori $a0, $zero, 3
-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
@@ -84,12 +84,21 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_32xi8_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 24
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0
+; CHECK-NEXT: ld.b $a0, $a0, 0
; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 %idx
@@ -100,12 +109,21 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_16xi16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 16
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1
+; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 %idx
@@ -116,11 +134,21 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xi32_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
+; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 %idx
@@ -131,11 +159,21 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xi64_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
-; CHECK-NEXT: movfr2gr.d $a0, $fa0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
+; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 %idx
@@ -146,10 +184,21 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xfloat_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
+; CHECK-NEXT: fld.s $fa0, $a0, 0
; CHECK-NEXT: fst.s $fa0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 %idx
@@ -160,10 +209,21 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xdouble_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
+; CHECK-NEXT: fld.d $fa0, $a0, 0
; CHECK-NEXT: fst.d $fa0, $a1, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 %idx
More information about the llvm-commits
mailing list