[llvm] [BOLT] Optimize the codegen of createLoadImmediate for AArch64. (PR #137413)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 25 15:59:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Rodrigo Rocha (rcorcs)
<details>
<summary>Changes</summary>
The code generation of createLoadImmediate for AArch64 was always emitting 4 instructions, regardless of the immediate value being loaded into the 64-bit register. This patch makes sure that only the necessary number of instructions are used depending on the value of the immediate being loaded into a register (ranging from 1 to 4 instructions).
The unit tests created help us to verify this new capability.
---
Full diff: https://github.com/llvm/llvm-project/pull/137413.diff
2 Files Affected:
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+20-8)
- (modified) bolt/unittests/Core/MCPlusBuilder.cpp (+84)
``````````diff
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index e00d6a18b0f6c..0aa9504f50a15 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2173,14 +2173,26 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
InstructionListType createLoadImmediate(const MCPhysReg Dest,
uint64_t Imm) const override {
- InstructionListType Insts(4);
- int Shift = 48;
- for (int I = 0; I < 4; I++, Shift -= 16) {
- Insts[I].setOpcode(AArch64::MOVKXi);
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
- Insts[I].addOperand(MCOperand::createImm(Shift));
+ InstructionListType Insts;
+ for (int I = 0, Shift = 0; I < 4; I++, Shift += 16) {
+ uint16_t HalfWord = (Imm >> Shift) & 0xFFFF;
+ if (!HalfWord)
+ continue;
+ MCInst Inst;
+ if (Insts.size() == 0) {
+ Inst.setOpcode(AArch64::MOVZXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(HalfWord));
+ Inst.addOperand(MCOperand::createImm(Shift));
+ Insts.push_back(Inst);
+ } else {
+ Inst.setOpcode(AArch64::MOVKXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(HalfWord));
+ Inst.addOperand(MCOperand::createImm(Shift));
+ Insts.push_back(Inst);
+ }
}
return Insts;
}
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 7016dec0e3574..ac0529cb09a7b 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -167,6 +167,90 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
ASSERT_EQ(Label, BB->getLabel());
}
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm32) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, 2);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 1);
+ auto II = BB->begin();
+ // mov x0, #2
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 2);
+ ASSERT_EQ(II->getOperand(2).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm64) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ int64_t Imm = ((uint64_t)4) << 48 | ((uint64_t)3) << 32 | 2 << 16 | 1;
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 4);
+ auto II = BB->begin();
+ // mov x0, #1
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 1);
+ ASSERT_EQ(II->getOperand(2).getImm(), 0);
+ II++;
+ // movk x0, #2, lsl #16
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 2);
+ ASSERT_EQ(II->getOperand(3).getImm(), 16);
+ II++;
+ // movk x0, #3, lsl #32
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 3);
+ ASSERT_EQ(II->getOperand(3).getImm(), 32);
+ II++;
+ // movk x0, #4, lsl #48
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 4);
+ ASSERT_EQ(II->getOperand(3).getImm(), 48);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm64Partial) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ int64_t Imm = ((uint64_t)4) << 48 | 2 << 16;
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 2);
+ auto II = BB->begin();
+ // mov x0, #2, lsl #16
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 2);
+ ASSERT_EQ(II->getOperand(2).getImm(), 16);
+ II++;
+ // movk x0, #4, lsl #48
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 4);
+ ASSERT_EQ(II->getOperand(3).getImm(), 48);
+}
+
TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitDef) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
``````````
</details>
https://github.com/llvm/llvm-project/pull/137413
More information about the llvm-commits
mailing list