[llvm-branch-commits] [llvm] [LoongArch][DAGCombiner] Combine vand (vnot ..) to vandn (PR #161037)
Zhaoxin Yang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Sep 29 23:33:14 PDT 2025
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/161037
>From 040e64772f8b48024f3390e5e402190c501d9302 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Thu, 25 Sep 2025 16:42:24 +0800
Subject: [PATCH 1/4] [LoongArch][DAGCombiner] Combine xor (and ..) to vandn
After this commit, DAGCombiner will have more opportunities to perform
vector folding. This patch includes several foldings, as follows:
- VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y))
- VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
---
.../LoongArch/LoongArchISelLowering.cpp | 155 ++++++++++++++++++
.../Target/LoongArch/LoongArchISelLowering.h | 3 +
.../LoongArch/LoongArchLASXInstrInfo.td | 26 +--
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 27 +--
4 files changed, 185 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 94f53d5b85f10..30d4bac25da78 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4939,6 +4939,96 @@ void LoongArchTargetLowering::ReplaceNodeResults(
}
}
+// Check if all elements in build_vector are the same or undef, and if so,
+// return true and set the splat element in SplatValue.
+static bool isSplatOrUndef(SDNode *N, SDValue &SplatValue) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ for (SDValue Op : N->ops()) {
+ if (!Op.isUndef() && SplatValue && Op != SplatValue)
+ return false;
+ if (!Op.isUndef())
+ SplatValue = Op;
+ }
+ return true;
+}
+
+// Helper to attempt to return a cheaper, bit-inverted version of \p V.
+static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
+ // TODO: don't always ignore oneuse constraints.
+ V = peekThroughBitcasts(V);
+ EVT VT = V.getValueType();
+
+ // Match not(xor X, -1) -> X.
+ if (V.getOpcode() == ISD::XOR &&
+ (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
+ isAllOnesConstant(V.getOperand(1))))
+ return V.getOperand(0);
+
+ // Match not(extract_subvector(not(X)) -> extract_subvector(X).
+ if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+ if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
+ V.getOperand(1));
+ }
+ }
+
+ // Match not(SplatVector(not(X)) -> SplatVector(X).
+ SDValue SplatValue;
+ if (isSplatOrUndef(V.getNode(), SplatValue) &&
+ V->isOnlyUserOf(SplatValue.getNode())) {
+ if (SDValue Not = isNOT(SplatValue, DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getSplat(VT, SDLoc(Not), Not);
+ }
+ }
+
+ // Match not(or(not(X),not(Y))) -> and(X, Y).
+ if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
+ // TODO: Handle cases with single NOT operand -> VANDN
+ if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
+ if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
+ return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
+ DAG.getBitcast(VT, Op1));
+ }
+
+ // TODO: Add more matching patterns. Such as,
+ // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
+ // not(slt(C, X)) -> slt(X - 1, C)
+
+ return SDValue();
+}
+
+/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
+static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
+
+ MVT VT = N->getSimpleValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+
+ SDValue X, Y;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (SDValue Not = isNOT(N0, DAG)) {
+ X = Not;
+ Y = N1;
+ } else if (SDValue Not = isNOT(N1, DAG)) {
+ X = Not;
+ Y = N0;
+ } else
+ return SDValue();
+
+ X = DAG.getBitcast(VT, X);
+ Y = DAG.getBitcast(VT, Y);
+ return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
@@ -4960,6 +5050,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.has32S())
return SDValue();
+ if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
+ return R;
+
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
!isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
@@ -6628,6 +6721,65 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
+static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+
+ // VANDN(undef, x) -> 0
+ // VANDN(x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // VANDN(0, x) -> x
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+
+ // VANDN(x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+
+ // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return DAG.getNOT(DL, N0, VT);
+
+ // Turn VANDN back to AND if input is inverted.
+ if (SDValue Not = isNOT(N0, DAG))
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
+
+ // Folds for better commutativity:
+ if (N1->hasOneUse()) {
+ // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
+ if (SDValue Not = isNOT(N1, DAG))
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
+
+ // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
+ // -> NOT(OR(x, SplatVector(-Imm))
+ // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
+ // gain benefits.
+ if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8)) {
+ SDValue SplatValue;
+ if (isSplatOrUndef(N1.getNode(), SplatValue) &&
+ N1->isOnlyUserOf(SplatValue.getNode()))
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
+ uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
+ SDValue Not =
+ DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
+ VT);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6663,6 +6815,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::VANDN:
+ return performVANDNCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -7454,6 +7608,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
NODE_NAME_CASE(VREPLVE)
+ NODE_NAME_CASE(VANDN)
NODE_NAME_CASE(VALL_ZERO)
NODE_NAME_CASE(VANY_ZERO)
NODE_NAME_CASE(VALL_NONZERO)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 3c00296116ac2..ed4f618983014 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -174,6 +174,9 @@ enum NodeType : unsigned {
VBSLL,
VBSRL,
+ // Vector bit operation
+ VANDN,
+
// Scalar load broadcast to vector
VLDREPL,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..b7f5993103286 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1395,7 +1395,7 @@ def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))),
(XVNOR_V LASX256:$xj, LASX256:$xk)>;
// XVANDN_V
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
-def : Pat<(and (vt (vnot LASX256:$xj)), (vt LASX256:$xk)),
+def : Pat<(loongarch_vandn (vt LASX256:$xj), (vt LASX256:$xk)),
(XVANDN_V LASX256:$xj, LASX256:$xk)>;
// XVORN_V
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
@@ -1449,25 +1449,25 @@ defm : PatXr<ctlz, "XVCLZ">;
defm : PatXr<ctpop, "XVPCNT">;
// XVBITCLR_{B/H/W/D}
-def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))),
+def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1, v32i8:$xk)), v32i8:$xj),
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
-def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))),
+def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1, v16i16:$xk)), v16i16:$xj),
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
-def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))),
+def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1, v8i32:$xk)), v8i32:$xj),
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
-def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))),
+def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1, v4i64:$xk)), v4i64:$xj),
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
-def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati8imm7 v32i8:$xk)))),
+def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v32i8:$xk))), v32i8:$xj),
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
-def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati16imm15 v16i16:$xk)))),
+def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v16i16:$xk))), v16i16:$xj),
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
-def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati32imm31 v8i32:$xk)))),
+def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v8i32:$xk))), v8i32:$xj),
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
-def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati64imm63 v4i64:$xk)))),
+def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v4i64:$xk))), v4i64:$xj),
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
// XVBITCLRI_{B/H/W/D}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 2c36099f8eb71..c5ce7b4e02678 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -56,6 +56,7 @@ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
+def loongarch_vandn: SDNode<"LoongArchISD::VANDN", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>;
@@ -1586,7 +1587,7 @@ def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))),
(VNOR_V LSX128:$vj, LSX128:$vk)>;
// VANDN_V
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
-def : Pat<(and (vt (vnot LSX128:$vj)), (vt LSX128:$vk)),
+def : Pat<(loongarch_vandn (vt LSX128:$vj), (vt LSX128:$vk)),
(VANDN_V LSX128:$vj, LSX128:$vk)>;
// VORN_V
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
@@ -1640,25 +1641,25 @@ defm : PatVr<ctlz, "VCLZ">;
defm : PatVr<ctpop, "VPCNT">;
// VBITCLR_{B/H/W/D}
-def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))),
+def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))),
+def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))),
+def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))),
+def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
-def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati8imm7 v16i8:$vk)))),
+def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v16i8:$vk))), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati16imm15 v8i16:$vk)))),
+def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v8i16:$vk))), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati32imm31 v4i32:$vk)))),
+def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v4i32:$vk))), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati64imm63 v2i64:$vk)))),
+def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v2i64:$vk))), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
// VBITCLRI_{B/H/W/D}
>From 86c85df1a5367f282153b5e02dcf7678abc59a2c Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Sun, 28 Sep 2025 11:17:28 +0800
Subject: [PATCH 2/4] modify test
---
.../CodeGen/LoongArch/lasx/and-not-combine.ll | 42 +++++++------------
.../CodeGen/LoongArch/lsx/and-not-combine.ll | 42 +++++++------------
2 files changed, 28 insertions(+), 56 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
index aa67a20ab08a7..ea20d0c71fe2b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -90,9 +90,8 @@ define void @pre_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: xvreplgr2vr.b $xr1, $a1
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %a
@@ -110,8 +109,7 @@ define void @post_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
-; CHECK-NEXT: xvxori.b $xr1, $xr1, 255
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %a
@@ -128,9 +126,8 @@ define void @pre_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: xvreplgr2vr.h $xr1, $a1
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %a
@@ -148,9 +145,7 @@ define void @post_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
-; CHECK-NEXT: xvrepli.b $xr2, -1
-; CHECK-NEXT: xvxor.v $xr1, $xr1, $xr2
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %a
@@ -167,9 +162,8 @@ define void @pre_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: xvreplgr2vr.w $xr1, $a1
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %a
@@ -187,9 +181,7 @@ define void @post_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
-; CHECK-NEXT: xvrepli.b $xr2, -1
-; CHECK-NEXT: xvxor.v $xr1, $xr1, $xr2
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %a
@@ -218,9 +210,8 @@ define void @pre_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA64-LABEL: pre_not_and_not_combine_v4i64:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: nor $a1, $a2, $zero
-; LA64-NEXT: xvreplgr2vr.d $xr1, $a1
-; LA64-NEXT: xvandn.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1
; LA64-NEXT: xvst $xr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <4 x i64>, ptr %a
@@ -240,9 +231,7 @@ define void @post_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
; LA32-NEXT: xvreplve0.d $xr1, $xr1
-; LA32-NEXT: xvrepli.b $xr2, -1
-; LA32-NEXT: xvxor.v $xr1, $xr1, $xr2
-; LA32-NEXT: xvandn.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvnor.v $xr0, $xr0, $xr1
; LA32-NEXT: xvst $xr0, $a0, 0
; LA32-NEXT: ret
;
@@ -250,9 +239,7 @@ define void @post_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a1, 0
; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
-; LA64-NEXT: xvrepli.b $xr2, -1
-; LA64-NEXT: xvxor.v $xr1, $xr1, $xr2
-; LA64-NEXT: xvandn.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1
; LA64-NEXT: xvst $xr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <4 x i64>, ptr %a
@@ -269,8 +256,7 @@ define void @and_not_combine_splatimm_v32i8(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: and_not_combine_splatimm_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvrepli.b $xr1, -4
-; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 3
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
index 960d8c4b156b5..393f0dafcac7e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -90,9 +90,8 @@ define void @pre_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: vreplgr2vr.b $vr1, $a1
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %a
@@ -110,8 +109,7 @@ define void @post_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
-; CHECK-NEXT: vxori.b $vr1, $vr1, 255
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %a
@@ -128,9 +126,8 @@ define void @pre_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: vreplgr2vr.h $vr1, $a1
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %a
@@ -148,9 +145,7 @@ define void @post_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
-; CHECK-NEXT: vrepli.b $vr2, -1
-; CHECK-NEXT: vxor.v $vr1, $vr1, $vr2
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %a
@@ -167,9 +162,8 @@ define void @pre_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind {
; CHECK-LABEL: pre_not_and_not_combine_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: nor $a1, $a2, $zero
-; CHECK-NEXT: vreplgr2vr.w $vr1, $a1
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %a
@@ -187,9 +181,7 @@ define void @post_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
-; CHECK-NEXT: vrepli.b $vr2, -1
-; CHECK-NEXT: vxor.v $vr1, $vr1, $vr2
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %a
@@ -218,9 +210,8 @@ define void @pre_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA64-LABEL: pre_not_and_not_combine_v2i64:
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: nor $a1, $a2, $zero
-; LA64-NEXT: vreplgr2vr.d $vr1, $a1
-; LA64-NEXT: vandn.v $vr0, $vr0, $vr1
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vnor.v $vr0, $vr0, $vr1
; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <2 x i64>, ptr %a
@@ -240,9 +231,7 @@ define void @post_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
-; LA32-NEXT: vrepli.b $vr2, -1
-; LA32-NEXT: vxor.v $vr1, $vr1, $vr2
-; LA32-NEXT: vandn.v $vr0, $vr0, $vr1
+; LA32-NEXT: vnor.v $vr0, $vr0, $vr1
; LA32-NEXT: vst $vr0, $a0, 0
; LA32-NEXT: ret
;
@@ -250,9 +239,7 @@ define void @post_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a1, 0
; LA64-NEXT: vreplgr2vr.d $vr1, $a2
-; LA64-NEXT: vrepli.b $vr2, -1
-; LA64-NEXT: vxor.v $vr1, $vr1, $vr2
-; LA64-NEXT: vandn.v $vr0, $vr0, $vr1
+; LA64-NEXT: vnor.v $vr0, $vr0, $vr1
; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <2 x i64>, ptr %a
@@ -269,8 +256,7 @@ define void @and_not_combine_splatimm_v16i8(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: and_not_combine_splatimm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vrepli.b $vr1, -4
-; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 3
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %a0
>From 6eb6a0136a77b981acc98e7f36e288cfc0980f7f Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Sun, 28 Sep 2025 21:28:34 +0800
Subject: [PATCH 3/4] Use getSplatValue instead of isSplatOrUndef
---
.../LoongArch/LoongArchISelLowering.cpp | 42 ++++++++-----------
1 file changed, 18 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 30d4bac25da78..35206587ee20b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4939,20 +4939,6 @@ void LoongArchTargetLowering::ReplaceNodeResults(
}
}
-// Check if all elements in build_vector are the same or undef, and if so,
-// return true and set the splat element in SplatValue.
-static bool isSplatOrUndef(SDNode *N, SDValue &SplatValue) {
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
- for (SDValue Op : N->ops()) {
- if (!Op.isUndef() && SplatValue && Op != SplatValue)
- return false;
- if (!Op.isUndef())
- SplatValue = Op;
- }
- return true;
-}
-
// Helper to attempt to return a cheaper, bit-inverted version of \p V.
static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
// TODO: don't always ignore oneuse constraints.
@@ -4976,12 +4962,16 @@ static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
}
// Match not(SplatVector(not(X)) -> SplatVector(X).
- SDValue SplatValue;
- if (isSplatOrUndef(V.getNode(), SplatValue) &&
- V->isOnlyUserOf(SplatValue.getNode())) {
- if (SDValue Not = isNOT(SplatValue, DAG)) {
- Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
- return DAG.getSplat(VT, SDLoc(Not), Not);
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
+ if (!V->isOnlyUserOf(SplatValue.getNode()))
+ return SDValue();
+
+ if (SDValue Not = isNOT(SplatValue, DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getSplat(VT, SDLoc(Not), Not);
+ }
}
}
@@ -6762,10 +6752,13 @@ static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
// -> NOT(OR(x, SplatVector(-Imm))
// Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
// gain benefits.
- if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8)) {
- SDValue SplatValue;
- if (isSplatOrUndef(N1.getNode(), SplatValue) &&
- N1->isOnlyUserOf(SplatValue.getNode()))
+ if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
+ N1.getOpcode() == ISD::BUILD_VECTOR) {
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
+ if (!N1->isOnlyUserOf(SplatValue.getNode()))
+ return SDValue();
+
if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
SDValue Not =
@@ -6774,6 +6767,7 @@ static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
VT);
}
+ }
}
}
>From fd81493e90331fe4753da1d3a65d5757e9cd3390 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Tue, 30 Sep 2025 14:02:15 +0800
Subject: [PATCH 4/4] Fix according to heiher's reviews.
---
.../LoongArch/LoongArchISelLowering.cpp | 6 +--
.../CodeGen/LoongArch/lasx/and-not-combine.ll | 38 ++++++-------------
.../CodeGen/LoongArch/lsx/and-not-combine.ll | 23 ++++-------
3 files changed, 23 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 35206587ee20b..049382d8ebbfe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5036,13 +5036,13 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
SDValue NewOperand;
MVT GRLenVT = Subtarget.getGRLenVT();
+ if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
+ return R;
+
// BSTRPICK requires the 32S feature.
if (!Subtarget.has32S())
return SDValue();
- if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
- return R;
-
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
!isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
index ea20d0c71fe2b..5ed49d959bf33 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -318,10 +318,9 @@ define void @and_or_not_combine_v32i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0
-; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvseq.b $xr1, $xr1, $xr2
-; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvandi.b $xr0, $xr0, 4
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 251
; CHECK-NEXT: xvst $xr0, $a3, 0
; CHECK-NEXT: ret
%a = load <32 x i8>, ptr %pa
@@ -343,12 +342,10 @@ define void @and_or_not_combine_v16i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) noun
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
; CHECK-NEXT: xvseq.h $xr1, $xr1, $xr2
-; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.h $xr1, 4
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.h $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a3, 0
; CHECK-NEXT: ret
%a = load <16 x i16>, ptr %pa
@@ -370,12 +367,10 @@ define void @and_or_not_combine_v8i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
; CHECK-NEXT: xvseq.w $xr1, $xr1, $xr2
-; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.w $xr1, 4
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.w $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a3, 0
; CHECK-NEXT: ret
%a = load <8 x i32>, ptr %pa
@@ -397,12 +392,10 @@ define void @and_or_not_combine_v4i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
; CHECK-NEXT: xvseq.d $xr1, $xr1, $xr2
-; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.d $xr1, 4
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.d $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a3, 0
; CHECK-NEXT: ret
%a = load <4 x i64>, ptr %pa
@@ -421,9 +414,8 @@ define void @and_extract_subvector_not_combine_v32i8(ptr %pa, ptr %dst) nounwind
; CHECK-LABEL: and_extract_subvector_not_combine_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
-; CHECK-NEXT: vandi.b $vr0, $vr0, 4
+; CHECK-NEXT: vnori.b $vr0, $vr0, 251
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%a = load volatile <32 x i8>, ptr %pa
@@ -440,11 +432,9 @@ define void @and_extract_subvector_not_combine_v16i16(ptr %pa, ptr %dst) nounwin
; CHECK-LABEL: and_extract_subvector_not_combine_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
; CHECK-NEXT: vrepli.h $vr1, 4
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%a = load volatile <16 x i16>, ptr %pa
@@ -460,11 +450,9 @@ define void @and_extract_subvector_not_combine_v8i32(ptr %pa, ptr %dst) nounwind
; CHECK-LABEL: and_extract_subvector_not_combine_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
; CHECK-NEXT: vrepli.w $vr1, 4
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%a = load volatile <8 x i32>, ptr %pa
@@ -479,11 +467,9 @@ define void @and_extract_subvector_not_combine_v4i64(ptr %pa, ptr %dst) nounwind
; CHECK-LABEL: and_extract_subvector_not_combine_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
; CHECK-NEXT: vrepli.d $vr1, 4
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%a = load volatile <4 x i64>, ptr %pa
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
index 393f0dafcac7e..f439a33230596 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -318,10 +318,9 @@ define void @and_or_not_combine_v16i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vld $vr2, $a1, 0
; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vseq.b $vr1, $vr1, $vr2
-; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vandi.b $vr0, $vr0, 4
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 251
; CHECK-NEXT: vst $vr0, $a3, 0
; CHECK-NEXT: ret
%a = load <16 x i8>, ptr %pa
@@ -343,12 +342,10 @@ define void @and_or_not_combine_v8i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vld $vr2, $a1, 0
; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
; CHECK-NEXT: vseq.h $vr1, $vr1, $vr2
-; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.h $vr1, 4
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.h $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a3, 0
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %pa
@@ -370,12 +367,10 @@ define void @and_or_not_combine_v4i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vld $vr2, $a1, 0
; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
; CHECK-NEXT: vseq.w $vr1, $vr1, $vr2
-; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.w $vr1, 4
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.w $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a3, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %pa
@@ -397,12 +392,10 @@ define void @and_or_not_combine_v2i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounw
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vld $vr2, $a1, 0
; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
; CHECK-NEXT: vseq.d $vr1, $vr1, $vr2
-; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.d $vr1, 4
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.d $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a3, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %pa
More information about the llvm-branch-commits
mailing list