[llvm-branch-commits] [llvm] [LoongArch] Optimize for immediate value materialization using BSTRINS_D instruction (PR #106332)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 28 01:54:47 PDT 2024
================
@@ -41,11 +43,82 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
Insts.push_back(Inst(LoongArch::ORI, Lo12));
}
+ // hi32
+ // Higher20
if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));
+ // Highest12
if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
+ size_t N = Insts.size();
+ if (N < 3)
+ return Insts;
+
+ // When the number of instruction sequences is greater than 2, we have the
+ // opportunity to optimize using the BSTRINS_D instruction. The scenario is as
+ // follows:
+ //
+ // N of Insts = 3
+ // 1. ORI + LU32I_D + LU52I_D => ORI + BSTRINS_D, TmpVal = ORI
+ // 2. ADDI_W + LU32I_D + LU32I_D => ADDI_W + BSTRINS_D, TmpVal = ADDI_W
+ // 3. LU12I_W + ORI + LU32I_D => ORI + BSTRINS_D, TmpVal = ORI
+ // 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W
+ //
+ // N of Insts = 4
+ // 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D
+ // => ORI + LU52I_D + BSTRINS_D
+ // TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D)
+ // The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`.
+ uint64_t TmpVal1 = 0;
+ uint64_t TmpVal2 = 0;
+ switch (Insts[0].Opc) {
+ default:
+ llvm_unreachable("unexpected opcode");
+ break;
+ case LoongArch::LU12I_W:
+ if (Insts[1].Opc == LoongArch::ORI) {
+ TmpVal1 = Insts[1].Imm;
+ if (N == 3)
+ break;
+ TmpVal2 = Insts[3].Imm << 52 | TmpVal1;
+ }
+ TmpVal1 |= Insts[0].Imm << 12;
+ break;
+ case LoongArch::ORI:
+ case LoongArch::ADDI_W:
+ TmpVal1 = Insts[0].Imm;
+ break;
+ }
+
+ for (uint64_t Msb = 32; Msb < 64; ++Msb) {
+ uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1);
+ for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) {
----------------
heiher wrote:
It appears the maximum number of iterations may be up to `∑_{i=32}^{63}`. Could we reduce the complexity?
https://github.com/llvm/llvm-project/pull/106332
More information about the llvm-branch-commits
mailing list