[llvm] [LoongArch] Custom lowering for 128-bit vector integer shifts (PR #171097)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 01:33:23 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
After PR #<!-- -->169491, the DAG combiner can still recreate vecotr UDIV in an illegal type even after type legalization, which is the root cause of issue #<!-- -->170976.
The optimization introduced in PR #<!-- -->169491 is still desirable, so this patch adds custom lowering for 128-bit vector integer shifts to prevent the DAG from producing nodes with illegal types.
Fixes #<!-- -->170976
---
Patch is 28.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171097.diff
8 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+62-1)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+28-16)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+59-51)
- (modified) llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll (+6-2)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll (+18-36)
- (added) llvm/test/CodeGen/LoongArch/lsx/issue170976.ll (+74)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+13)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32ea2198f7898..2356f551c119e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
VT, Legal);
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
- setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Custom);
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
setCondCodeAction(
@@ -618,10 +618,71 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return lowerVectorSRA_SRL_SHL(Op, DAG);
}
return SDValue();
}
+/// getVShiftImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+SDValue
+LoongArchTargetLowering::lowerVectorSRA_SRL_SHL(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ int64_t Cnt;
+
+ if (!Op.getOperand(1).getValueType().isVector())
+ return Op;
+ unsigned EltSize = VT.getScalarSizeInBits();
+ MVT GRLenVT = Subtarget.getGRLenVT();
+
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 &&
+ Cnt < EltSize)
+ return DAG.getNode(LoongArchISD::VSLLI, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, GRLenVT));
+ return DAG.getNode(LoongArchISD::VSLL, DL, VT, Op.getOperand(0),
+ Op.getOperand(1));
+ case ISD::SRA:
+ case ISD::SRL:
+ if (getVShiftImm(Op.getOperand(1), EltSize, Cnt) && Cnt >= 0 &&
+ Cnt < EltSize) {
+ unsigned Opc = (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRAI
+ : LoongArchISD::VSRLI;
+ return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, GRLenVT));
+ }
+ unsigned Opc =
+ (Op.getOpcode() == ISD::SRA) ? LoongArchISD::VSRA : LoongArchISD::VSRL;
+ return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1));
+ }
+
+ llvm_unreachable("unexpected shift opcode");
+}
+
// Helper to attempt to return a cheaper, bit-inverted version of \p V.
static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
// TODO: don't always ignore oneuse constraints.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 5277e7e3e74ca..6ad14ea9d6951 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d6af093411c3a..6bb74e76fabc6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1332,6 +1332,18 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
}
}
+multiclass XVAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
+multiclass XVAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLASX] in {
// XVADD_{B/H/W/D}
@@ -2041,22 +2053,22 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
sub_128)>;
// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
-defm : VAvgPat<sra, "XVAVG_B", v32i8>;
-defm : VAvgPat<sra, "XVAVG_H", v16i16>;
-defm : VAvgPat<sra, "XVAVG_W", v8i32>;
-defm : VAvgPat<sra, "XVAVG_D", v4i64>;
-defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
-defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
-defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
-defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
-defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
-defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
-defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
-defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
-defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
-defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
-defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
-defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
+defm : XVAvgPat<sra, "XVAVG_B", v32i8>;
+defm : XVAvgPat<sra, "XVAVG_H", v16i16>;
+defm : XVAvgPat<sra, "XVAVG_W", v8i32>;
+defm : XVAvgPat<sra, "XVAVG_D", v4i64>;
+defm : XVAvgPat<srl, "XVAVG_BU", v32i8>;
+defm : XVAvgPat<srl, "XVAVG_HU", v16i16>;
+defm : XVAvgPat<srl, "XVAVG_WU", v8i32>;
+defm : XVAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : XVAvgrPat<sra, "XVAVGR_B", v32i8>;
+defm : XVAvgrPat<sra, "XVAVGR_H", v16i16>;
+defm : XVAvgrPat<sra, "XVAVGR_W", v8i32>;
+defm : XVAvgrPat<sra, "XVAVGR_D", v4i64>;
+defm : XVAvgrPat<srl, "XVAVGR_BU", v32i8>;
+defm : XVAvgrPat<srl, "XVAVGR_HU", v16i16>;
+defm : XVAvgrPat<srl, "XVAVGR_WU", v8i32>;
+defm : XVAvgrPat<srl, "XVAVGR_DU", v4i64>;
// abs
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 43ad3819029cf..933975a05878b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -72,10 +72,20 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
+// Vector logicial left / right shift
+def loongarch_vsll : SDNode<"LoongArchISD::VSLL", SDT_LoongArchV2R>;
+def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>;
+
+// Vector arithmetic right shift
+def loongarch_vsra : SDNode<"LoongArchISD::VSRA", SDT_LoongArchV2R>;
+
// Vector logicial left / right shift by immediate
def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
+// Vector arithmetic right shift by immediate
+def loongarch_vsrai : SDNode<"LoongArchISD::VSRAI", SDT_LoongArchV1RUimm>;
+
// Vector byte logicial left / right shift
def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>;
def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>;
@@ -1531,14 +1541,14 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
- def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)),
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
}
multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
(vt (vsplat_imm_eq_1)))),
- (vt (vsplat_imm_eq_1))),
+ (GRLenVT 1)),
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
}
@@ -1641,21 +1651,19 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32,
(VBSRL_V LSX128:$vj, uimm5:$imm)>;
// VSLL[I]_{B/H/W/D}
-defm : PatVrVr<shl, "VSLL">;
-defm : PatShiftVrVr<shl, "VSLL">;
-defm : PatShiftVrSplatUimm<shl, "VSLLI">;
+defm : PatVrVr<loongarch_vsll, "VSLL">;
+defm : PatShiftVrVr<loongarch_vsll, "VSLL">;
defm : PatShiftVrUimm<loongarch_vslli, "VSLLI">;
// VSRL[I]_{B/H/W/D}
-defm : PatVrVr<srl, "VSRL">;
-defm : PatShiftVrVr<srl, "VSRL">;
-defm : PatShiftVrSplatUimm<srl, "VSRLI">;
+defm : PatVrVr<loongarch_vsrl, "VSRL">;
+defm : PatShiftVrVr<loongarch_vsrl, "VSRL">;
defm : PatShiftVrUimm<loongarch_vsrli, "VSRLI">;
// VSRA[I]_{B/H/W/D}
-defm : PatVrVr<sra, "VSRA">;
-defm : PatShiftVrVr<sra, "VSRA">;
-defm : PatShiftVrSplatUimm<sra, "VSRAI">;
+defm : PatVrVr<loongarch_vsra, "VSRA">;
+defm : PatShiftVrVr<loongarch_vsra, "VSRA">;
+defm : PatShiftVrUimm<loongarch_vsrai, "VSRAI">;
// VROTR[I]_{B/H/W/D}
defm : PatVrVr<rotr, "VROTR">;
@@ -1669,24 +1677,24 @@ defm : PatVr<ctlz, "VCLZ">;
defm : PatVr<ctpop, "VPCNT">;
// VBITCLR_{B/H/W/D}
-def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj),
+def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj),
+def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj),
+def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj),
+def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
-def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1,
+def : Pat<(loongarch_vandn (v16i8 (loongarch_vsll vsplat_imm_eq_1,
(vsplati8imm7 v16i8:$vk))), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1,
+def : Pat<(loongarch_vandn (v8i16 (loongarch_vsll vsplat_imm_eq_1,
(vsplati16imm15 v8i16:$vk))), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1,
+def : Pat<(loongarch_vandn (v4i32 (loongarch_vsll vsplat_imm_eq_1,
(vsplati32imm31 v4i32:$vk))), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1,
+def : Pat<(loongarch_vandn (v2i64 (loongarch_vsll vsplat_imm_eq_1,
(vsplati64imm63 v2i64:$vk))), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
@@ -1701,21 +1709,21 @@ def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
(VBITCLRI_D LSX128:$vj, uimm6:$imm)>;
// VBITSET_{B/H/W/D}
-def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
+def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)),
(v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
+def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)),
(v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
+def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)),
(v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)),
+def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)),
(v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>;
-def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
+def : Pat<(or v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
(v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
+def : Pat<(or v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
(v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
+def : Pat<(or v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
(v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
+def : Pat<(or v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
(v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>;
// VBITSETI_{B/H/W/D}
@@ -1729,21 +1737,21 @@ def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
(VBITSETI_D LSX128:$vj, uimm6:$imm)>;
// VBITREV_{B/H/W/D}
-def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
+def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, v16i8:$vk)),
(v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
+def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, v8i16:$vk)),
(v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
+def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, v4i32:$vk)),
(v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)),
+def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, v2i64:$vk)),
(v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>;
-def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
+def : Pat<(xor v16i8:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
(v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
+def : Pat<(xor v8i16:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
(v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
+def : Pat<(xor v4i32:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
(v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
+def : Pat<(xor v2i64:$vj, (loongarch_vsll vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
(v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>;
// VBITREVI_{B/H/W/D}
@@ -2191,22 +2199,22 @@ def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
-defm : VAvgPat<sra, "VAVG_B", v16i8>;
-defm : VAvgPat<sra, "VAVG_H", v8i16>;
-defm : VAvgPat<sra, "VAVG_W", v4i32>;
-defm : VAvgPat<sra, "VAVG_D", v2i64>;
-defm : VAvgPat<srl, "VAVG_BU", v16i8>;
-defm : VAvgPat<srl, "VAVG_HU", v8i16>;
-defm : VAvgPat<srl, "VAVG_WU", v4i32>;
-defm : VAvgPat<srl, "VAVG_DU", v2i64>;
-defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
-defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
-defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
-defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
-defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
-defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
-defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
-defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
+defm : VAvgPat<loongarch_vsrai, "VAVG_B", v16i8>;
+defm : VAvgPat<loongarch_vsrai, "VAVG_H", v8i16>;
+defm : VAvgPat<loongarch_vsrai, "VAVG_W", v4i32>;
+defm : VAvgPat<loongarch_vsrai, "VAVG_D", v2i64>;
+defm : VAvgPat<loongarch_vsrli, "VAVG_BU", v16i8>;
+defm : VAvgPat<loongarch_vsrli, "VAVG_HU", v8i16>;
+defm : VAvgPat<loongarch_vsrli, "VAVG_WU", v4i32>;
+defm : VAvgPat<loongarch_vsrli, "VAVG_DU", v2i64>;
+defm : VAvgrPat<loongarch_vsrai, "VAVGR_B", v16i8>;
+defm : VAvgrPat<loongarch_vsrai, "VAVGR_H", v8i16>;
+defm : VAvgrPat<loongarch_vsrai, "VAVGR_W", v4i32>;
+defm : VAvgrPat<loongarch_vsrai, "VAVGR_D", v2i64>;
+defm : VAvgrPat<loongarch_vsrli, "VAVGR_BU", v16i8>;
+defm : VAvgrPat<loongarch_vsrli, "VAVGR_HU", v8i16>;
+defm : VAvgrPat<loongarch_vsrli, "VAVGR_WU", v4i32>;
+defm : VAvgrPat<loongarch_vsrli, "VAVGR_DU", v2i64>;
// abs
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
index b0d36a8143fa1..b043e90d302a6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
@@ -9,8 +9,10 @@ declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
; LA32-LABEL: test_bitreverse_v16i8:
; LA32: # %bb.0:
-; LA32-NEXT: vslli.b $vr1, $vr0, 4
+; LA32-NEXT: vandi.b $vr1, $vr0, 15
+; LA32-NEXT: vslli.b $vr1, $vr1, 4
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vandi.b $vr0, $vr0, 15
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
; LA32-NEXT: vandi.b $vr1, $vr0, 51
; LA32-NEXT: vslli.b $vr1, $vr1, 2
@@ -114,8 +116,10 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
-; LA32-NEXT: vslli.b $vr1, $vr0, 4
+; LA32-NEXT: vandi.b $vr1, $vr0, 15
+; LA32-NEXT: vslli.b $vr1, $vr1, 4
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vandi.b $vr0, $vr0, 15
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
; LA32-NEXT: vandi.b $vr1, $vr0, 51
; LA32-NEXT: vslli.b $vr1, $vr1, 2
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
index 334af22edee59..1de393965c7a0 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -54,22 +54,13 @@ entry:
}
define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vavg_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
-; LA32-NEXT: vsrai.d $vr0, $vr0, 1
-; LA32-NEXT: vst $vr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vavg_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vld $vr1, $a2, 0
-; LA64-NEXT: vavg.d $vr0, $vr0, $vr1
-; LA64-NEXT: vst $vr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vavg_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -131,22 +122,13 @@ entry:
}
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vavg_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
-; LA32-NEXT: vsrli.d $vr0, $vr0, 1
-; LA32-NEXT: vst $vr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vavg_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vld $vr1, $a2, 0
-; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
-; LA64-NEXT: vst $vr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171097
More information about the llvm-commits
mailing list