[llvm-branch-commits] [LoongArch] Optimize for immediate value materialization using BSTRINS_D instruction (PR #106332)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 27 20:25:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
@llvm/pr-subscribers-mc
Author: wanglei (wangleiat)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/106332.diff
10 Files Affected:
- (modified) llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp (+24-6)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+19-3)
- (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp (+8)
- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp (+73)
- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h (+1)
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+4-8)
- (modified) llvm/test/CodeGen/LoongArch/imm.ll (+9-16)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll (+4-6)
- (modified) llvm/test/CodeGen/LoongArch/merge-base-offset.ll (+2-4)
- (modified) llvm/test/MC/LoongArch/Macros/macros-li.s (+1-2)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index c2ae4a0734b6a7..b8f1cdfd2cb354 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -1291,14 +1291,32 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm);
for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
- unsigned Opc = Inst.Opc;
- if (Opc == LoongArch::LU12I_W)
- Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addImm(Inst.Imm),
- getSTI());
- else
+ switch (Inst.Opc) {
+ case LoongArch::LU12I_W:
Out.emitInstruction(
- MCInstBuilder(Opc).addReg(DestReg).addReg(SrcReg).addImm(Inst.Imm),
+ MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm), getSTI());
+ break;
+ case LoongArch::ADDI_W:
+ case LoongArch::ORI:
+ case LoongArch::LU32I_D:
+ case LoongArch::LU52I_D:
+ Out.emitInstruction(
+ MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
+ Inst.Imm),
getSTI());
+ break;
+ case LoongArch::BSTRINS_D:
+ Out.emitInstruction(MCInstBuilder(Inst.Opc)
+ .addReg(DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(Inst.Imm >> 32)
+ .addImm(Inst.Imm & 0xFF),
+ getSTI());
+ break;
+ default:
+ llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
+ }
SrcReg = DestReg;
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index b6ade6b978d2ce..70ed1e6fbdbdac 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -62,10 +62,26 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
// The instructions in the sequence are handled here.
for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT);
- if (Inst.Opc == LoongArch::LU12I_W)
- Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm);
- else
+ switch (Inst.Opc) {
+ case LoongArch::LU12I_W:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SDImm);
+ break;
+ case LoongArch::ADDI_W:
+ case LoongArch::ORI:
+ case LoongArch::LU32I_D:
+ case LoongArch::LU52I_D:
Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm);
+ break;
+ case LoongArch::BSTRINS_D:
+ Result = CurDAG->getMachineNode(
+ Inst.Opc, DL, GRLenVT,
+ {SrcReg, SrcReg,
+ CurDAG->getTargetConstant(Inst.Imm >> 32, DL, GRLenVT),
+ CurDAG->getTargetConstant(Inst.Imm & 0xFF, DL, GRLenVT)});
+ break;
+ default:
+ llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
+ }
SrcReg = SDValue(Result, 0);
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 9059da460f1358..d1af65192ee612 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -210,6 +210,14 @@ void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB,
.addImm(Inst.Imm)
.setMIFlag(Flag);
break;
+ case LoongArch::BSTRINS_D:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(Inst.Imm >> 32)
+ .addImm(Inst.Imm & 0xFF)
+ .setMIFlag(Flag);
+ break;
default:
assert(false && "Unknown insn emitted by LoongArchMatInt");
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
index 1509c436c81098..de5f6ea9aba04c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
@@ -26,11 +26,13 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
const int64_t Lo12 = Val & 0xFFF;
InstSeq Insts;
+ // LU52I_D used for: Bits[63:52] | Bits[51:0].
if (Highest12 != 0 && SignExtend64<52>(Val) == 0) {
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
return Insts;
}
+ // lo32
if (Hi20 == 0)
Insts.push_back(Inst(LoongArch::ORI, Lo12));
else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20))
@@ -41,11 +43,82 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
Insts.push_back(Inst(LoongArch::ORI, Lo12));
}
+ // hi32
+ // Higher20
if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));
+ // Highest12
if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
+ size_t N = Insts.size();
+ if (N < 3)
+ return Insts;
+
+ // When the number of instruction sequences is greater than 2, we have the
+ // opportunity to optimize using the BSTRINS_D instruction. The scenario is as
+ // follows:
+ //
+ // N of Insts = 3
+ // 1. ORI + LU32I_D + LU52I_D => ORI + BSTRINS_D, TmpVal = ORI
+ // 2. ADDI_W + LU32I_D + LU32I_D => ADDI_W + BSTRINS_D, TmpVal = ADDI_W
+ // 3. LU12I_W + ORI + LU32I_D => ORI + BSTRINS_D, TmpVal = ORI
+ // 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W
+ //
+ // N of Insts = 4
+ // 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D
+ // => ORI + LU52I_D + BSTRINS_D
+ // TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D)
+ // The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`.
+ uint64_t TmpVal1 = 0;
+ uint64_t TmpVal2 = 0;
+ switch (Insts[0].Opc) {
+ default:
+ llvm_unreachable("unexpected opcode");
+ break;
+ case LoongArch::LU12I_W:
+ if (Insts[1].Opc == LoongArch::ORI) {
+ TmpVal1 = Insts[1].Imm;
+ if (N == 3)
+ break;
+ TmpVal2 = Insts[3].Imm << 52 | TmpVal1;
+ }
+ TmpVal1 |= Insts[0].Imm << 12;
+ break;
+ case LoongArch::ORI:
+ case LoongArch::ADDI_W:
+ TmpVal1 = Insts[0].Imm;
+ break;
+ }
+
+ for (uint64_t Msb = 32; Msb < 64; ++Msb) {
+ uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1);
+ for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) {
+ uint64_t LowMask = (1ULL << Lsb) - 1;
+ uint64_t Mask = HighMask | LowMask;
+ uint64_t LsbToZero = TmpVal1 & ((1UL << (Msb - Lsb + 1)) - 1);
+ uint64_t MsbToLsb = LsbToZero << Lsb;
+ if ((MsbToLsb | (TmpVal1 & Mask)) == (uint64_t)Val) {
+ if (Insts[1].Opc == LoongArch::ORI && N == 3)
+ Insts[0] = Insts[1];
+ Insts.pop_back_n(2);
+ Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
+ return Insts;
+ }
+ if (TmpVal2 != 0) {
+ LsbToZero = TmpVal2 & ((1UL << (Msb - Lsb + 1)) - 1);
+ MsbToLsb = LsbToZero << Lsb;
+ if ((MsbToLsb | (TmpVal2 & Mask)) == (uint64_t)Val) {
+ Insts[0] = Insts[1];
+ Insts[1] = Insts[3];
+ Insts.pop_back_n(2);
+ Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
+ return Insts;
+ }
+ }
+ }
+ }
+
return Insts;
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
index be1b425894de1a..3a3c12c353fb8e 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
@@ -16,6 +16,7 @@ namespace llvm {
namespace LoongArchMatInt {
struct Inst {
unsigned Opc;
+ // Imm: Opc's imm operand, if Opc == BSTRINS_D, Imm = MSB << 32 | LSB.
int64_t Imm;
Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
};
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index f17cec231f3236..3efdd08bbea4c4 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -338,14 +338,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA64-NEXT: srli.d $a1, $a0, 1
; LA64-NEXT: lu12i.w $a2, 349525
; LA64-NEXT: ori $a2, $a2, 1365
-; LA64-NEXT: lu32i.d $a2, 349525
-; LA64-NEXT: lu52i.d $a2, $a2, 1365
+; LA64-NEXT: bstrins.d $a2, $a2, 62, 32
; LA64-NEXT: and $a1, $a1, $a2
; LA64-NEXT: sub.d $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 209715
; LA64-NEXT: ori $a1, $a1, 819
-; LA64-NEXT: lu32i.d $a1, 209715
-; LA64-NEXT: lu52i.d $a1, $a1, 819
+; LA64-NEXT: bstrins.d $a1, $a1, 61, 32
; LA64-NEXT: and $a2, $a0, $a1
; LA64-NEXT: srli.d $a0, $a0, 2
; LA64-NEXT: and $a0, $a0, $a1
@@ -354,13 +352,11 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 61680
; LA64-NEXT: ori $a1, $a1, 3855
-; LA64-NEXT: lu32i.d $a1, -61681
-; LA64-NEXT: lu52i.d $a1, $a1, 240
+; LA64-NEXT: bstrins.d $a1, $a1, 59, 32
; LA64-NEXT: and $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 4112
; LA64-NEXT: ori $a1, $a1, 257
-; LA64-NEXT: lu32i.d $a1, 65793
-; LA64-NEXT: lu52i.d $a1, $a1, 16
+; LA64-NEXT: bstrins.d $a1, $a1, 56, 32
; LA64-NEXT: mul.d $a0, $a0, $a1
; LA64-NEXT: srli.d $a0, $a0, 56
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/imm.ll b/llvm/test/CodeGen/LoongArch/imm.ll
index 746306bacc8d57..aca508e99fb960 100644
--- a/llvm/test/CodeGen/LoongArch/imm.ll
+++ b/llvm/test/CodeGen/LoongArch/imm.ll
@@ -47,8 +47,7 @@ define i64 @imm0008000000000fff() {
; CHECK-LABEL: imm0008000000000fff:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a0, $zero, 4095
-; CHECK-NEXT: lu32i.d $a0, -524288
-; CHECK-NEXT: lu52i.d $a0, $a0, 0
+; CHECK-NEXT: bstrins.d $a0, $a0, 51, 51
; CHECK-NEXT: ret
ret i64 2251799813689343
}
@@ -168,9 +167,8 @@ define i64 @imm0008000080000800() {
define i64 @imm14000000a() {
; CHECK-LABEL: imm14000000a:
; CHECK: # %bb.0:
-; CHECK-NEXT: lu12i.w $a0, 262144
-; CHECK-NEXT: ori $a0, $a0, 10
-; CHECK-NEXT: lu32i.d $a0, 1
+; CHECK-NEXT: ori $a0, $zero, 10
+; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
; CHECK-NEXT: ret
ret i64 5368709130
}
@@ -179,8 +177,7 @@ define i64 @imm0fff000000000fff() {
; CHECK-LABEL: imm0fff000000000fff:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a0, $zero, 4095
-; CHECK-NEXT: lu32i.d $a0, -65536
-; CHECK-NEXT: lu52i.d $a0, $a0, 255
+; CHECK-NEXT: bstrins.d $a0, $a0, 59, 48
; CHECK-NEXT: ret
ret i64 1152640029630140415
}
@@ -189,8 +186,7 @@ define i64 @immffecffffffffffec() {
; CHECK-LABEL: immffecffffffffffec:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.w $a0, $zero, -20
-; CHECK-NEXT: lu32i.d $a0, -196609
-; CHECK-NEXT: lu52i.d $a0, $a0, -2
+; CHECK-NEXT: bstrins.d $a0, $a0, 52, 48
; CHECK-NEXT: ret
ret i64 -5348024557502484
}
@@ -199,8 +195,7 @@ define i64 @imm1c000000700000() {
; CHECK-LABEL: imm1c000000700000:
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, 1792
-; CHECK-NEXT: lu32i.d $a0, -262144
-; CHECK-NEXT: lu52i.d $a0, $a0, 1
+; CHECK-NEXT: bstrins.d $a0, $a0, 52, 30
; CHECK-NEXT: ret
ret i64 7881299355238400
}
@@ -210,8 +205,7 @@ define i64 @immf0f0f0f0f0f0f0f0() {
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, -61681
; CHECK-NEXT: ori $a0, $a0, 240
-; CHECK-NEXT: lu32i.d $a0, 61680
-; CHECK-NEXT: lu52i.d $a0, $a0, -241
+; CHECK-NEXT: bstrins.d $a0, $a0, 59, 32
; CHECK-NEXT: ret
ret i64 -1085102592571150096
}
@@ -219,10 +213,9 @@ define i64 @immf0f0f0f0f0f0f0f0() {
define i64 @imm110000014000000a() {
; CHECK-LABEL: imm110000014000000a:
; CHECK: # %bb.0:
-; CHECK-NEXT: lu12i.w $a0, 262144
-; CHECK-NEXT: ori $a0, $a0, 10
-; CHECK-NEXT: lu32i.d $a0, 1
+; CHECK-NEXT: ori $a0, $zero, 10
; CHECK-NEXT: lu52i.d $a0, $a0, 272
+; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
; CHECK-NEXT: ret
ret i64 1224979104013484042
}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
index 772ae8d81a88bf..9654542f877459 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
@@ -973,9 +973,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
; LA64NOPIC-LABEL: ld_sd_constant:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: lu12i.w $a1, -136485
-; LA64NOPIC-NEXT: ori $a1, $a1, 3823
-; LA64NOPIC-NEXT: lu32i.d $a1, -147729
-; LA64NOPIC-NEXT: lu52i.d $a2, $a1, -534
+; LA64NOPIC-NEXT: ori $a2, $a1, 3823
+; LA64NOPIC-NEXT: bstrins.d $a2, $a2, 61, 32
; LA64NOPIC-NEXT: ld.d $a1, $a2, 0
; LA64NOPIC-NEXT: st.d $a0, $a2, 0
; LA64NOPIC-NEXT: move $a0, $a1
@@ -984,9 +983,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
; LA64PIC-LABEL: ld_sd_constant:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: lu12i.w $a1, -136485
-; LA64PIC-NEXT: ori $a1, $a1, 3823
-; LA64PIC-NEXT: lu32i.d $a1, -147729
-; LA64PIC-NEXT: lu52i.d $a2, $a1, -534
+; LA64PIC-NEXT: ori $a2, $a1, 3823
+; LA64PIC-NEXT: bstrins.d $a2, $a2, 61, 32
; LA64PIC-NEXT: ld.d $a1, $a2, 0
; LA64PIC-NEXT: st.d $a0, $a2, 0
; LA64PIC-NEXT: move $a0, $a1
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index 1e7a79beb62c61..323858c7613a67 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -1128,8 +1128,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_a64)
; LA64-NEXT: lu12i.w $a1, 279556
; LA64-NEXT: ori $a1, $a1, 1088
-; LA64-NEXT: lu32i.d $a1, 17472
-; LA64-NEXT: lu52i.d $a1, $a1, 1092
+; LA64-NEXT: bstrins.d $a1, $a1, 62, 32
; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: ret
;
@@ -1142,8 +1141,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu12i.w $a1, 279556
; LA64-LARGE-NEXT: ori $a1, $a1, 1088
-; LA64-LARGE-NEXT: lu32i.d $a1, 17472
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
+; LA64-LARGE-NEXT: bstrins.d $a1, $a1, 62, 32
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
diff --git a/llvm/test/MC/LoongArch/Macros/macros-li.s b/llvm/test/MC/LoongArch/Macros/macros-li.s
index 994aa439effa1b..8ac82a766f6043 100644
--- a/llvm/test/MC/LoongArch/Macros/macros-li.s
+++ b/llvm/test/MC/LoongArch/Macros/macros-li.s
@@ -45,8 +45,7 @@ li.d $a0, 0x7ffff00000800
li.d $a0, 0x8000000000fff
# CHECK: ori $a0, $zero, 4095
-# CHECK-NEXT: lu32i.d $a0, -524288
-# CHECK-NEXT: lu52i.d $a0, $a0, 0
+# CHECK-NEXT: bstrins.d $a0, $a0, 51, 51
li.d $a0, 0x8000080000800
# CHECK: lu12i.w $a0, -524288
``````````
</details>
https://github.com/llvm/llvm-project/pull/106332
More information about the llvm-branch-commits
mailing list