[llvm] 9b87ad3 - [LoongArch] Implement OR combination to generate bstrins.w/d
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 14 02:21:31 PDT 2022
Author: Weining Lu
Date: 2022-07-14T17:20:43+08:00
New Revision: 9b87ad33c1fa0ecf09bbdc5cc2384ae081101269
URL: https://github.com/llvm/llvm-project/commit/9b87ad33c1fa0ecf09bbdc5cc2384ae081101269
DIFF: https://github.com/llvm/llvm-project/commit/9b87ad33c1fa0ecf09bbdc5cc2384ae081101269.diff
LOG: [LoongArch] Implement OR combination to generate bstrins.w/d
Differential Revision: https://reviews.llvm.org/D129357
Added:
llvm/test/CodeGen/LoongArch/bstrins_d.ll
llvm/test/CodeGen/LoongArch/bstrins_w.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5c2652114375d..4acf90bd97884 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -102,6 +103,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(FunctionAlignment);
setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::SRL);
}
@@ -502,6 +504,224 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ EVT ValTy = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *CN0, *CN1;
+ SDLoc DL(N);
+ unsigned ValBits = ValTy.getSizeInBits();
+ unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
+ unsigned Shamt;
+ bool SwapAndRetried = false;
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (ValBits != 32 && ValBits != 64)
+ return SDValue();
+
+Retry:
+ // 1st pattern to match BSTRINS:
+ // R = or (and X, mask0), (and (shl Y, lsb), mask1)
+ // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
+ // =>
+ // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
+ MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 2nd pattern to match BSTRINS:
+ // R = or (and X, mask0), (shl (and Y, mask1), lsb)
+ // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
+ // =>
+ // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
+ MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 3rd pattern to match BSTRINS:
+ // R = or (and X, mask0), (and Y, mask1)
+ // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
+ // =>
+ // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
+ // where msb = lsb + size - 1
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ (MaskIdx0 + MaskLen0 <= 64) &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
+ (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
+ DAG.getConstant(MaskIdx0, DL, GRLenVT)),
+ DAG.getConstant(ValBits == 32
+ ? (MaskIdx0 + (MaskLen0 & 31) - 1)
+ : (MaskIdx0 + MaskLen0 - 1),
+ DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 4th pattern to match BSTRINS:
+ // R = or (and X, mask), (shl Y, shamt)
+ // where mask = (2**shamt - 1)
+ // =>
+ // R = BSTRINS X, Y, ValBits - 1, shamt
+ // where ValBits = 32 or 64
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
+ MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskLen0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0),
+ DAG.getConstant((ValBits - 1), DL, GRLenVT),
+ DAG.getConstant(Shamt, DL, GRLenVT));
+ }
+
+ // 5th pattern to match BSTRINS:
+ // R = or (and X, mask), const
+ // where ~mask = (2**size - 1) << lsb, mask & const = 0
+ // =>
+ // R = BSTRINS X, (const >> lsb), msb, lsb
+ // where msb = lsb + size - 1
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
+ (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
+ return DAG.getNode(
+ LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 6th pattern.
+ // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
+ // by the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
+ //
+ // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
+ // pattern is more common than the 1st. So we put the 1st before the 6th in
+ // order to match as many nodes as possible.
+ ConstantSDNode *CNMask, *CNShamt;
+ unsigned MaskIdx, MaskLen;
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
+ MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ CNShamt->getZExtValue() + MaskLen <= ValBits) {
+ Shamt = CNShamt->getZExtValue();
+ APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(Shamt, DL, GRLenVT));
+ }
+ }
+
+ // 7th pattern.
+ // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
+ // overwritten by the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
+ //
+ // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
+ // before the 7th in order to match as many nodes as possible.
+ if (N1.getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
+ N1.getOperand(0).getOpcode() == ISD::SHL &&
+ (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ CNShamt->getZExtValue() == MaskIdx) {
+ APInt ShMask(ValBits, CNMask->getZExtValue());
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(MaskIdx, DL, GRLenVT));
+ }
+ }
+
+ // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
+ if (!SwapAndRetried) {
+ std::swap(N0, N1);
+ SwapAndRetried = true;
+ goto Retry;
+ }
+
+ SwapAndRetried = false;
+Retry2:
+ // 8th pattern.
+ // a = b | (c & shifted_mask), where all positions in b to be overwritten by
+ // the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
+ //
+ // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
+ // we put it here in order to match as many nodes as possible or generate less
+ // instructions.
+ if (N1.getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
+ APInt ShMask(ValBits, CNMask->getZExtValue());
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
+ N1->getOperand(0),
+ DAG.getConstant(MaskIdx, DL, GRLenVT)),
+ DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(MaskIdx, DL, GRLenVT));
+ }
+ }
+ // Swap N0/N1 and retry.
+ if (!SwapAndRetried) {
+ std::swap(N0, N1);
+ SwapAndRetried = true;
+ goto Retry2;
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -510,6 +730,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return performORCombine(N, DAG, DCI, Subtarget);
case ISD::SRL:
return performSRLCombine(N, DAG, DCI, Subtarget);
}
@@ -579,6 +801,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SLL_W)
NODE_NAME_CASE(SRA_W)
NODE_NAME_CASE(SRL_W)
+ NODE_NAME_CASE(BSTRINS)
NODE_NAME_CASE(BSTRPICK)
NODE_NAME_CASE(MOVGR2FR_W_LA64)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index be58660893eb3..279550482675e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -41,6 +41,7 @@ enum NodeType : unsigned {
FTINT,
+ BSTRINS,
BSTRPICK,
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index e3286f6590ccb..d07d086bd7da8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -26,6 +26,11 @@ def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>
]>;
+def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>,
+ SDTCisSameAs<3, 4>
+]>;
+
def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [
SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3>
]>;
@@ -46,6 +51,8 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone,
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+def loongarch_bstrins
+ : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
: SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
@@ -777,15 +784,21 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in
def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>,
PseudoInstExpansion<(JIRL R0, R1, 0)>;
-/// BSTRPICK
+/// BSTRINS and BSTRPICK
-let Predicates = [IsLA32] in
+let Predicates = [IsLA32] in {
+def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd),
+ (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>;
def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd),
(BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>;
+} // Predicates = [IsLA32]
-let Predicates = [IsLA64] in
+let Predicates = [IsLA64] in {
+def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
+ (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
(BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
+} // Predicates = [IsLA64]
/// Loads
diff --git a/llvm/test/CodeGen/LoongArch/bstrins_d.ll b/llvm/test/CodeGen/LoongArch/bstrins_d.ll
new file mode 100644
index 0000000000000..342e044c7a7be
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/bstrins_d.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s
+
+;; Test generation of the bstrins.d instruction.
+;; There are 8 patterns that can be matched to bstrins.d. See performORCombine
+;; for details.
+
+;; Pattern 1
+;; R = or (and X, mask0), (and (shl Y, lsb), mask1)
+;; =>
+;; R = BSTRINS X, Y, msb, lsb
+define i64 @pat1(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff
+ %shl = shl i64 %b, 16
+ %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000
+ %or = or i64 %and1, %and2
+ ret i64 %or
+}
+
+define i64 @pat1_swap(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat1_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff
+ %shl = shl i64 %b, 16
+ %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000
+ %or = or i64 %and2, %and1
+ ret i64 %or
+}
+
+;; Pattern 2
+;; R = or (and X, mask0), (shl (and Y, mask1), lsb)
+;; =>
+;; R = BSTRINS X, Y, msb, lsb
+define i64 @pat2(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff
+ %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff
+ %shl = shl i64 %and2, 16
+ %or = or i64 %and1, %shl
+ ret i64 %or
+}
+
+define i64 @pat2_swap(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat2_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff
+ %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff
+ %shl = shl i64 %and2, 16
+ %or = or i64 %shl, %and1
+ ret i64 %or
+}
+
+;; Pattern 3
+;; R = or (and X, mask0), (and Y, mask1)
+;; =>
+;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb
+define i64 @pat3(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi $a1, $a1, 288
+; CHECK-NEXT: srli.d $a1, $a1, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f
+ %and2 = and i64 %b, 288 ; 0x0000000000000120
+ %or = or i64 %and1, %and2
+ ret i64 %or
+}
+
+define i64 @pat3_swap(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat3_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi $a1, $a1, 288
+; CHECK-NEXT: srli.d $a1, $a1, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f
+ %and2 = and i64 %b, 288 ; 0x0000000000000120
+ %or = or i64 %and2, %and1
+ ret i64 %or
+}
+
+;; Pattern 4
+;; R = or (and X, mask), (shl Y, shamt)
+;; =>
+;; R = BSTRINS X, Y, 63, shamt
+define i64 @pat4(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %a, 255
+ %shl = shl i64 %b, 8
+ %or = or i64 %and, %shl
+ ret i64 %or
+}
+
+define i64 @pat4_swap(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: pat4_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %a, 255
+ %shl = shl i64 %b, 8
+ %or = or i64 %shl, %and
+ ret i64 %or
+}
+
+;; Pattern 5
+;; R = or (and X, mask0), const
+;; =>
+;; R = BSTRINS X, (const >> lsb), msb, lsb
+define i64 @pat5(i64 %a) nounwind {
+; CHECK-LABEL: pat5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 74565
+; CHECK-NEXT: ori $a1, $a1, 1656
+; CHECK-NEXT: bstrins.d $a0, $a1, 47, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff
+ %or = or i64 %and, 20015998304256 ; 0x0000123456780000
+ ret i64 %or
+}
+
+;; Pattern 6: a = b | ((c & mask) << shamt)
+;; In this testcase b is 0x123456000000789a, but in fact we do not require b
+;; being a constant. As long as all positions in b to be overwritten by the
+;; incoming bits are known to be zero, the pattern could be matched.
+define i64 @pat6(i64 %c) nounwind {
+; CHECK-LABEL: pat6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 7
+; CHECK-NEXT: ori $a1, $a1, 2202
+; CHECK-NEXT: lu32i.d $a1, 284160
+; CHECK-NEXT: lu52i.d $a1, $a1, 291
+; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16
+; CHECK-NEXT: move $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %c, 16777215 ; 0x0000000000ffffff
+ %shl = shl i64 %and, 16
+ %or = or i64 %shl, 1311767949471676570 ; 0x123456000000789a
+ ret i64 %or
+}
+
+;; Pattern 7: a = b | ((c << shamt) & shifted_mask)
+;; Similar to pattern 6.
+define i64 @pat7(i64 %c) nounwind {
+; CHECK-LABEL: pat7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 7
+; CHECK-NEXT: ori $a1, $a1, 2202
+; CHECK-NEXT: lu32i.d $a1, 284160
+; CHECK-NEXT: lu52i.d $a1, $a1, 291
+; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16
+; CHECK-NEXT: move $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %shl = shl i64 %c, 16
+ %and = and i64 %shl, 1099511562240 ; 0x000000ffffff0000
+ %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a
+ ret i64 %or
+}
+
+;; Pattern 8: a = b | (c & shifted_mask)
+;; Similar to pattern 7 but without shift to c.
+define i64 @pat8(i64 %c) nounwind {
+; CHECK-LABEL: pat8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli.d $a1, $a0, 16
+; CHECK-NEXT: lu12i.w $a0, 7
+; CHECK-NEXT: ori $a0, $a0, 2202
+; CHECK-NEXT: lu32i.d $a0, 284160
+; CHECK-NEXT: lu52i.d $a0, $a0, 291
+; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %c, 1099511562240 ; 0x000000ffffff0000
+ %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a
+ ret i64 %or
+}
+
+;; Test that bstrins.d is not generated because constant OR operand
+;; doesn't fit into bits cleared by constant AND operand.
+define i64 @no_bstrins_d(i64 %a) nounwind {
+; CHECK-LABEL: no_bstrins_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 354185
+; CHECK-NEXT: lu32i.d $a1, 4660
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: lu12i.w $a1, 354191
+; CHECK-NEXT: ori $a1, $a1, 4095
+; CHECK-NEXT: lu32i.d $a1, -60876
+; CHECK-NEXT: and $a0, $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff
+ %or = or i64 %and, 20015998341120 ; 0x0000123456789000
+ ret i64 %or
+}
diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
new file mode 100644
index 0000000000000..47c4d826c2ee5
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s
+
+;; Test generation of the bstrins.w instruction.
+;; There are 8 patterns that can be matched to bstrins.w. See performORCombine
+;; for details.
+
+;; Pattern 1
+;; R = or (and X, mask0), (and (shl Y, lsb), mask1)
+;; =>
+;; R = BSTRINS X, Y, msb, lsb
+define i32 @pat1(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -1048321 ; 0xfff000ff
+ %shl = shl i32 %b, 8
+ %and2 = and i32 %shl, 1048320 ; 0x000fff00
+ %or = or i32 %and1, %and2
+ ret i32 %or
+}
+
+define i32 @pat1_swap(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat1_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -1048321 ; 0xfff000ff
+ %shl = shl i32 %b, 8
+ %and2 = and i32 %shl, 1048320 ; 0x000fff00
+ %or = or i32 %and2, %and1
+ ret i32 %or
+}
+
+;; Pattern 2
+;; R = or (and X, mask0), (shl (and Y, mask1), lsb)
+;; =>
+;; R = BSTRINS X, Y, msb, lsb
+define i32 @pat2(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -1048321 ; 0xfff000ff
+ %and2 = and i32 %b, 4095 ; 0x00000fff
+ %shl = shl i32 %and2, 8
+ %or = or i32 %and1, %shl
+ ret i32 %or
+}
+
+define i32 @pat2_swap(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat2_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -1048321 ; 0xfff000ff
+ %and2 = and i32 %b, 4095 ; 0x00000fff
+ %shl = shl i32 %and2, 8
+ %or = or i32 %shl, %and1
+ ret i32 %or
+}
+
+;; Pattern 3
+;; R = or (and X, mask0), (and Y, mask1)
+;; =>
+;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb
+define i32 @pat3(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi $a1, $a1, 288
+; CHECK-NEXT: srli.w $a1, $a1, 4
+; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -4081 ; 0xfffff00f
+ %and2 = and i32 %b, 288 ; 0x00000120
+ %or = or i32 %and1, %and2
+ ret i32 %or
+}
+
+define i32 @pat3_swap(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat3_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi $a1, $a1, 288
+; CHECK-NEXT: srli.w $a1, $a1, 4
+; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, -4081 ; 0xfffff00f
+ %and2 = and i32 %b, 288 ; 0x00000120
+ %or = or i32 %and2, %and1
+ ret i32 %or
+}
+
+define i32 @pat3_positive_mask0(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat3_positive_mask0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli.w $a1, $a1, 28
+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and1 = and i32 %a, 268435455 ; 0x0fffffff
+ %and2 = and i32 %b, 4026531840 ; 0xf0000000
+ %or = or i32 %and1, %and2
+ ret i32 %or
+}
+
+;; Pattern 4
+;; R = or (and X, mask), (shl Y, shamt)
+;; =>
+;; R = BSTRINS X, Y, 31, shamt
+define i32 @pat4(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %a, 268435455 ; 0x0fffffff
+ %shl = shl i32 %b, 28
+ %or = or i32 %and, %shl
+ ret i32 %or
+}
+
+define i32 @pat4_swap(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pat4_swap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %a, 268435455 ; 0x0fffffff
+ %shl = shl i32 %b, 28
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+;; Pattern 5
+;; R = or (and X, mask), const
+;; =>
+;; R = BSTRINS X, (const >> lsb), msb, lsb
+define i32 @pat5(i32 %a) nounwind {
+; CHECK-LABEL: pat5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 1
+; CHECK-NEXT: ori $a1, $a1, 564
+; CHECK-NEXT: bstrins.w $a0, $a1, 23, 8
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %a, 4278190335 ; 0xff0000ff
+ %or = or i32 %and, 1192960 ; 0x00123400
+ ret i32 %or
+}
+
+;; Pattern 6: a = b | ((c & mask) << shamt)
+;; In this testcase b is 0x10000002, but in fact we do not require b being a
+;; constant. As long as all positions in b to be overwritten by the incoming
+;; bits are known to be zero, the pattern could be matched.
+define i32 @pat6(i32 %c) nounwind {
+; CHECK-LABEL: pat6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 65536
+; CHECK-NEXT: ori $a1, $a1, 2
+; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4
+; CHECK-NEXT: move $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %c, 16777215 ; 0x00ffffff
+ %shl = shl i32 %and, 4
+ %or = or i32 %shl, 268435458 ; 0x10000002
+ ret i32 %or
+}
+
+;; Pattern 7: a = b | ((c << shamt) & shifted_mask)
+;; Similar to pattern 6.
+define i32 @pat7(i32 %c) nounwind {
+; CHECK-LABEL: pat7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 65536
+; CHECK-NEXT: ori $a1, $a1, 2
+; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4
+; CHECK-NEXT: move $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %shl = shl i32 %c, 4
+ %and = and i32 %shl, 268435440 ; 0x0ffffff0
+ %or = or i32 %and, 268435458 ; 0x10000002
+ ret i32 %or
+}
+
+;; Pattern 8: a = b | (c & shifted_mask)
+;; Similar to pattern 7 but without shift to c.
+define i32 @pat8(i32 %c) nounwind {
+; CHECK-LABEL: pat8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli.w $a1, $a0, 4
+; CHECK-NEXT: lu12i.w $a0, 65536
+; CHECK-NEXT: ori $a0, $a0, 2
+; CHECK-NEXT: bstrins.w $a0, $a1, 27, 4
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %c, 268435440 ; 0x0ffffff0
+ %or = or i32 %and, 268435458 ; 0x10000002
+ ret i32 %or
+}
+
+;; Test that bstrins.w is not generated because constant OR operand
+;; doesn't fit into bits cleared by constant AND operand.
+define i32 @no_bstrins_w(i32 %a) nounwind {
+; CHECK-LABEL: no_bstrins_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a1, 291
+; CHECK-NEXT: ori $a1, $a1, 1104
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: lu12i.w $a1, -3805
+; CHECK-NEXT: ori $a1, $a1, 1279
+; CHECK-NEXT: and $a0, $a0, $a1
+; CHECK-NEXT: jirl $zero, $ra, 0
+ %and = and i32 %a, 4278190335 ; 0xff0000ff
+ %or = or i32 %and, 1193040 ; 0x00123450
+ ret i32 %or
+}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
index 968a701660c06..33f6dbee748ef 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
@@ -250,9 +250,8 @@ define double @convert_u32_to_double(i32 %a) nounwind {
; LA64-NEXT: addi.d $a1, $a1, .LCPI12_0
; LA64-NEXT: fld.d $fa1, $a1, 0
; LA64-NEXT: fsub.d $fa0, $fa0, $fa1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: lu52i.d $a1, $zero, 1075
-; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 275200
+; LA64-NEXT: bstrins.d $a0, $a1, 63, 32
; LA64-NEXT: movgr2fr.d $fa1, $a0
; LA64-NEXT: fadd.d $fa0, $fa1, $fa0
; LA64-NEXT: jirl $zero, $ra, 0
@@ -280,9 +279,8 @@ define double @convert_u64_to_double(i64 %a) nounwind {
; LA64-NEXT: addi.d $a1, $a1, .LCPI13_0
; LA64-NEXT: fld.d $fa1, $a1, 0
; LA64-NEXT: fsub.d $fa0, $fa0, $fa1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: lu52i.d $a1, $zero, 1075
-; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 275200
+; LA64-NEXT: bstrins.d $a0, $a1, 63, 32
; LA64-NEXT: movgr2fr.d $fa1, $a0
; LA64-NEXT: fadd.d $fa0, $fa1, $fa0
; LA64-NEXT: jirl $zero, $ra, 0
More information about the llvm-commits
mailing list