[llvm] a108881 - [LoongArch] Custom lowering for vector logical right shifts of integers (#171097)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 00:39:33 PST 2025
Author: hev
Date: 2025-12-10T16:39:29+08:00
New Revision: a108881b24ecfea8d194b33dd9fb211943065bca
URL: https://github.com/llvm/llvm-project/commit/a108881b24ecfea8d194b33dd9fb211943065bca
DIFF: https://github.com/llvm/llvm-project/commit/a108881b24ecfea8d194b33dd9fb211943065bca.diff
LOG: [LoongArch] Custom lowering for vector logical right shifts of integers (#171097)
After PR #169491, the DAG combiner can still recreate vector UDIV in an
illegal type even after type legalization, which is the root cause of
issue #170976.
The optimization introduced in PR #169491 is still desirable, so this
patch adds custom lowering for vector integer logical right shifts to
prevent the DAG from producing nodes with illegal types.
Fixes #170976
Added:
llvm/test/CodeGen/LoongArch/lasx/issue170976.ll
llvm/test/CodeGen/LoongArch/lsx/issue170976.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32ea2198f7898..4d232028133db 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
VT, Legal);
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
- setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
+ setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal);
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
setCondCodeAction(
@@ -354,6 +354,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -427,7 +428,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
VT, Legal);
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
- setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
+ setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal);
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
setCondCodeAction(
@@ -444,6 +445,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -618,10 +620,51 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
+ case ISD::SRL:
+ return lowerVectorSRL(Op, DAG);
}
return SDValue();
}
+/// getVShiftAmt - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Amt = SplatBits.getSExtValue();
+ return true;
+}
+
+SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ int64_t Amt;
+
+ if (!Op.getOperand(1).getValueType().isVector())
+ return Op;
+ unsigned EltSize = VT.getScalarSizeInBits();
+ MVT GRLenVT = Subtarget.getGRLenVT();
+
+ assert(Op.getOpcode() == ISD::SRL && "unexpected shift opcode");
+ if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize)
+ return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Amt, DL, GRLenVT));
+ return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
// Helper to attempt to return a cheaper, bit-inverted version of \p V.
static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
// TODO: don't always ignore oneuse constraints.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 5277e7e3e74ca..84622c30c0999 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorSRL(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d6af093411c3a..5896ca3f5a980 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1437,9 +1437,8 @@ defm : PatShiftXrSplatUimm<shl, "XVSLLI">;
defm : PatShiftXrUimm<loongarch_vslli, "XVSLLI">;
// XVSRL[I]_{B/H/W/D}
-defm : PatXrXr<srl, "XVSRL">;
-defm : PatShiftXrXr<srl, "XVSRL">;
-defm : PatShiftXrSplatUimm<srl, "XVSRLI">;
+defm : PatXrXr<loongarch_vsrl, "XVSRL">;
+defm : PatShiftXrXr<loongarch_vsrl, "XVSRL">;
defm : PatShiftXrUimm<loongarch_vsrli, "XVSRLI">;
// XVSRA[I]_{B/H/W/D}
@@ -2045,18 +2044,18 @@ defm : VAvgPat<sra, "XVAVG_B", v32i8>;
defm : VAvgPat<sra, "XVAVG_H", v16i16>;
defm : VAvgPat<sra, "XVAVG_W", v8i32>;
defm : VAvgPat<sra, "XVAVG_D", v4i64>;
-defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
-defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
-defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
-defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : VAvgIPat<loongarch_vsrli, "XVAVG_BU", v32i8>;
+defm : VAvgIPat<loongarch_vsrli, "XVAVG_HU", v16i16>;
+defm : VAvgIPat<loongarch_vsrli, "XVAVG_WU", v8i32>;
+defm : VAvgIPat<loongarch_vsrli, "XVAVG_DU", v4i64>;
defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
-defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
-defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
-defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
-defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
+defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_BU", v32i8>;
+defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_HU", v16i16>;
+defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_WU", v8i32>;
+defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_DU", v4i64>;
// abs
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 43ad3819029cf..96bf8a2db835d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -72,6 +72,9 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
+// Vector logicial right shift
+def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>;
+
// Vector logicial left / right shift by immediate
def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
@@ -1535,6 +1538,11 @@ multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
}
+multiclass VAvgIPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
(vt (vsplat_imm_eq_1)))),
@@ -1542,6 +1550,13 @@ multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
}
+multiclass VAvgrIPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (GRLenVT 1)),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1647,9 +1662,8 @@ defm : PatShiftVrSplatUimm<shl, "VSLLI">;
defm : PatShiftVrUimm<loongarch_vslli, "VSLLI">;
// VSRL[I]_{B/H/W/D}
-defm : PatVrVr<srl, "VSRL">;
-defm : PatShiftVrVr<srl, "VSRL">;
-defm : PatShiftVrSplatUimm<srl, "VSRLI">;
+defm : PatVrVr<loongarch_vsrl, "VSRL">;
+defm : PatShiftVrVr<loongarch_vsrl, "VSRL">;
defm : PatShiftVrUimm<loongarch_vsrli, "VSRLI">;
// VSRA[I]_{B/H/W/D}
@@ -2195,18 +2209,18 @@ defm : VAvgPat<sra, "VAVG_B", v16i8>;
defm : VAvgPat<sra, "VAVG_H", v8i16>;
defm : VAvgPat<sra, "VAVG_W", v4i32>;
defm : VAvgPat<sra, "VAVG_D", v2i64>;
-defm : VAvgPat<srl, "VAVG_BU", v16i8>;
-defm : VAvgPat<srl, "VAVG_HU", v8i16>;
-defm : VAvgPat<srl, "VAVG_WU", v4i32>;
-defm : VAvgPat<srl, "VAVG_DU", v2i64>;
+defm : VAvgIPat<loongarch_vsrli, "VAVG_BU", v16i8>;
+defm : VAvgIPat<loongarch_vsrli, "VAVG_HU", v8i16>;
+defm : VAvgIPat<loongarch_vsrli, "VAVG_WU", v4i32>;
+defm : VAvgIPat<loongarch_vsrli, "VAVG_DU", v2i64>;
defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
-defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
-defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
-defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
-defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
+defm : VAvgrIPat<loongarch_vsrli, "VAVGR_BU", v16i8>;
+defm : VAvgrIPat<loongarch_vsrli, "VAVGR_HU", v8i16>;
+defm : VAvgrIPat<loongarch_vsrli, "VAVGR_WU", v4i32>;
+defm : VAvgrIPat<loongarch_vsrli, "VAVGR_DU", v2i64>;
// abs
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
index 8b12216d0f856..7f663d8de3cb8 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
@@ -11,6 +11,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: xvslli.b $xr1, $xr0, 4
; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT: xvandi.b $xr0, $xr0, 15
; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
; LA32-NEXT: xvandi.b $xr1, $xr0, 51
; LA32-NEXT: xvslli.b $xr1, $xr1, 2
@@ -163,6 +164,7 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
; LA32-NEXT: xvslli.b $xr1, $xr0, 4
; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT: xvandi.b $xr0, $xr0, 15
; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
; LA32-NEXT: xvandi.b $xr1, $xr0, 51
; LA32-NEXT: xvslli.b $xr1, $xr1, 2
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
index 5c5c19935080b..0577a116bee5a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -131,22 +131,13 @@ entry:
}
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvavg_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
-; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvavg_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -298,8 +289,8 @@ define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
; LA32-NEXT: xvld $xr0, $a1, 0
; LA32-NEXT: xvld $xr1, $a2, 0
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
-; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
-; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvrepli.d $xr1, 1
+; LA32-NEXT: xvavg.du $xr0, $xr0, $xr1
; LA32-NEXT: xvst $xr0, $a0, 0
; LA32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll
new file mode 100644
index 0000000000000..9b17d7b8c9767
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/issue170976.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define <64 x i8> @test_i8(<64 x i8> %shuffle) {
+; CHECK-LABEL: test_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.b $xr2, -85
+; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr2
+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvmuh.bu $xr1, $xr1, $xr2
+; CHECK-NEXT: xvsrli.b $xr1, $xr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <64 x i8> %shuffle, splat (i8 3)
+ ret <64 x i8> %div
+}
+
+define <32 x i16> @test_i16(<32 x i16> %shuffle) {
+; CHECK-LABEL: test_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a0, 10
+; CHECK-NEXT: ori $a0, $a0, 2731
+; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
+; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr2
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvmuh.hu $xr1, $xr1, $xr2
+; CHECK-NEXT: xvsrli.h $xr1, $xr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <32 x i16> %shuffle, splat (i16 3)
+ ret <32 x i16> %div
+}
+
+define <16 x i32> @test_i32(<16 x i32> %shuffle) {
+; CHECK-LABEL: test_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a0, -349526
+; CHECK-NEXT: ori $a0, $a0, 2731
+; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
+; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr2
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvmuh.wu $xr1, $xr1, $xr2
+; CHECK-NEXT: xvsrli.w $xr1, $xr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <16 x i32> %shuffle, splat (i32 3)
+ ret <16 x i32> %div
+}
+
+define <8 x i64> @test_i64(<8 x i64> %shuffle) {
+; LA32-LABEL: test_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvrepli.d $xr2, 3
+; LA32-NEXT: xvdiv.du $xr0, $xr0, $xr2
+; LA32-NEXT: xvdiv.du $xr1, $xr1, $xr2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, -349526
+; LA64-NEXT: ori $a0, $a0, 2731
+; LA64-NEXT: lu32i.d $a0, -349526
+; LA64-NEXT: lu52i.d $a0, $a0, -1366
+; LA64-NEXT: xvreplgr2vr.d $xr2, $a0
+; LA64-NEXT: xvmuh.du $xr0, $xr0, $xr2
+; LA64-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA64-NEXT: xvmuh.du $xr1, $xr1, $xr2
+; LA64-NEXT: xvsrli.d $xr1, $xr1, 1
+; LA64-NEXT: ret
+entry:
+ %div = udiv <8 x i64> %shuffle, splat (i64 3)
+ ret <8 x i64> %div
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
index b0d36a8143fa1..ba84e5c136de3 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
@@ -11,6 +11,7 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: vslli.b $vr1, $vr0, 4
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vandi.b $vr0, $vr0, 15
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
; LA32-NEXT: vandi.b $vr1, $vr0, 51
; LA32-NEXT: vslli.b $vr1, $vr1, 2
@@ -116,6 +117,7 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
; LA32-NEXT: vslli.b $vr1, $vr0, 4
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vandi.b $vr0, $vr0, 15
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
; LA32-NEXT: vandi.b $vr1, $vr0, 51
; LA32-NEXT: vslli.b $vr1, $vr1, 2
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
index 334af22edee59..8e700689fdc58 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -131,22 +131,13 @@ entry:
}
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vavg_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vld $vr1, $a2, 0
-; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
-; LA32-NEXT: vsrli.d $vr0, $vr0, 1
-; LA32-NEXT: vst $vr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vavg_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vld $vr1, $a2, 0
-; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
-; LA64-NEXT: vst $vr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vavg_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -298,8 +289,8 @@ define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vld $vr1, $a2, 0
; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
-; LA32-NEXT: vaddi.du $vr0, $vr0, 1
-; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vrepli.d $vr1, 1
+; LA32-NEXT: vavg.du $vr0, $vr0, $vr1
; LA32-NEXT: vst $vr0, $a0, 0
; LA32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll
new file mode 100644
index 0000000000000..df4da0178f389
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/issue170976.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define <32 x i8> @test_i8(<32 x i8> %shuffle) {
+; CHECK-LABEL: test_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.b $vr2, -85
+; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr2
+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vmuh.bu $vr1, $vr1, $vr2
+; CHECK-NEXT: vsrli.b $vr1, $vr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <32 x i8> %shuffle, splat (i8 3)
+ ret <32 x i8> %div
+}
+
+define <16 x i16> @test_i16(<16 x i16> %shuffle) {
+; CHECK-LABEL: test_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a0, 10
+; CHECK-NEXT: ori $a0, $a0, 2731
+; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
+; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr2
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vmuh.hu $vr1, $vr1, $vr2
+; CHECK-NEXT: vsrli.h $vr1, $vr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <16 x i16> %shuffle, splat (i16 3)
+ ret <16 x i16> %div
+}
+
+define <8 x i32> @test_i32(<8 x i32> %shuffle) {
+; CHECK-LABEL: test_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a0, -349526
+; CHECK-NEXT: ori $a0, $a0, 2731
+; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
+; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr2
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vmuh.wu $vr1, $vr1, $vr2
+; CHECK-NEXT: vsrli.w $vr1, $vr1, 1
+; CHECK-NEXT: ret
+entry:
+ %div = udiv <8 x i32> %shuffle, splat (i32 3)
+ ret <8 x i32> %div
+}
+
+define <4 x i64> @test_i64(<4 x i64> %shuffle) {
+; LA32-LABEL: test_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vrepli.d $vr2, 3
+; LA32-NEXT: vdiv.du $vr0, $vr0, $vr2
+; LA32-NEXT: vdiv.du $vr1, $vr1, $vr2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, -349526
+; LA64-NEXT: ori $a0, $a0, 2731
+; LA64-NEXT: lu32i.d $a0, -349526
+; LA64-NEXT: lu52i.d $a0, $a0, -1366
+; LA64-NEXT: vreplgr2vr.d $vr2, $a0
+; LA64-NEXT: vmuh.du $vr0, $vr0, $vr2
+; LA64-NEXT: vsrli.d $vr0, $vr0, 1
+; LA64-NEXT: vmuh.du $vr1, $vr1, $vr2
+; LA64-NEXT: vsrli.d $vr1, $vr1, 1
+; LA64-NEXT: ret
+entry:
+ %div = udiv <4 x i64> %shuffle, splat (i64 3)
+ ret <4 x i64> %div
+}
More information about the llvm-commits
mailing list