[llvm] [AArch64] Use mov imm pseudo instructions in madd combine. (PR #147510)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 05:31:02 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/147510
The usual path for lowering immediates in AArch64 is to generate a MOVi32imm or MOVi64imm pseudo instruction, that can be moved / rematerialized around as required, being expanded into one or multiple instructions after register allocation.
The code for the MachineCombiner was generating MOVN/ORR/MOVZ directly. This converts them to use the pseudos, allowing the generated immediates to be materialized if required. The code is hopefully simpler as a result, and the Sub and Add patterns have been combined to reduce duplication.
>From ec945d904a22c4b2002adfbc0be3b789fb8453a7 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 8 Jul 2025 13:03:55 +0100
Subject: [PATCH] [AArch64] Use mov imm pseudo instructions in madd combine.
The usual path for lowering immediates in AArch64 is to generate a MOVi32imm or
MOVi64imm pseudo instruction, that can be moved / rematerialized around as
required, being expanded into one or multiple instructions after register
allocation.
The code for the MachineCombiner was generating MOVN/ORR/MOVZ directly. This
converts them to use the pseudos, allowing the generated immediates to be
materialized if required. The code is hopefully simpler as a result, and the
Sub and Add patterns have been combined to reduce duplication.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 116 +++---------------
.../AArch64/machine-combiner-maddimm.mir | 24 ++--
llvm/test/CodeGen/AArch64/madd-combiner.ll | 3 +-
3 files changed, 33 insertions(+), 110 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8847c62690714..0c60bcb382e2e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7868,62 +7868,47 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case AArch64MachineCombinerPattern::MULADDWI_OP1:
- case AArch64MachineCombinerPattern::MULADDXI_OP1: {
+ case AArch64MachineCombinerPattern::MULADDXI_OP1:
+ case AArch64MachineCombinerPattern::MULSUBWI_OP1:
+ case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
- // ADD R,I,Imm
- // ==> MOV V, Imm
+ // ADD/SUB R,I,Imm
+ // ==> MOV V, Imm/-Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
- const TargetRegisterClass *OrrRC;
- unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
- OrrOpc = AArch64::ORRWri;
- OrrRC = &AArch64::GPR32spRegClass;
+ const TargetRegisterClass *RC;
+ unsigned BitSize, MovImm;
+ if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 ||
+ Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
+ MovImm = AArch64::MOVi32imm;
+ RC = &AArch64::GPR32spRegClass;
BitSize = 32;
- ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
- OrrOpc = AArch64::ORRXri;
- OrrRC = &AArch64::GPR64spRegClass;
+ MovImm = AArch64::MOVi64imm;
+ RC = &AArch64::GPR64spRegClass;
BitSize = 64;
- ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
- Register NewVR = MRI.createVirtualRegister(OrrRC);
+ Register NewVR = MRI.createVirtualRegister(RC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
- uint64_t UImm = SignExtend64(Imm, BitSize);
- // The immediate can be composed via a single instruction.
+ bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
+ Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1;
+ uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
+ // Check that the immediate can be composed via a single instruction.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
if (Insn.size() != 1)
return;
- auto MovI = Insn.begin();
- MachineInstrBuilder MIB1;
- // MOV is an alias for one of three instructions: movz, movn, and orr.
- if (MovI->Opcode == OrrOpc)
- MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(MovI->Op2);
- else {
- if (BitSize == 32)
- assert((MovI->Opcode == AArch64::MOVNWi ||
- MovI->Opcode == AArch64::MOVZWi) &&
- "Expected opcode");
- else
- assert((MovI->Opcode == AArch64::MOVNXi ||
- MovI->Opcode == AArch64::MOVZXi) &&
- "Expected opcode");
- MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
- .addImm(MovI->Op1)
- .addImm(MovI->Op2);
- }
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR).addImm(IsSub ? -Imm : Imm);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -7977,67 +7962,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case AArch64MachineCombinerPattern::MULSUBWI_OP1:
- case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
- // MUL I=A,B,0
- // SUB R,I, Imm
- // ==> MOV V, -Imm
- // ==> MADD R,A,B,V // = -Imm + A*B
- // --- Create(MADD);
- const TargetRegisterClass *OrrRC;
- unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
- OrrOpc = AArch64::ORRWri;
- OrrRC = &AArch64::GPR32spRegClass;
- BitSize = 32;
- ZeroReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- RC = &AArch64::GPR32RegClass;
- } else {
- OrrOpc = AArch64::ORRXri;
- OrrRC = &AArch64::GPR64spRegClass;
- BitSize = 64;
- ZeroReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
- RC = &AArch64::GPR64RegClass;
- }
- Register NewVR = MRI.createVirtualRegister(OrrRC);
- uint64_t Imm = Root.getOperand(2).getImm();
- if (Root.getOperand(3).isImm()) {
- unsigned Val = Root.getOperand(3).getImm();
- Imm = Imm << Val;
- }
- uint64_t UImm = SignExtend64(-Imm, BitSize);
- // The immediate can be composed via a single instruction.
- SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
- AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
- if (Insn.size() != 1)
- return;
- auto MovI = Insn.begin();
- MachineInstrBuilder MIB1;
- // MOV is an alias for one of three instructions: movz, movn, and orr.
- if (MovI->Opcode == OrrOpc)
- MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(MovI->Op2);
- else {
- if (BitSize == 32)
- assert((MovI->Opcode == AArch64::MOVNWi ||
- MovI->Opcode == AArch64::MOVZWi) &&
- "Expected opcode");
- else
- assert((MovI->Opcode == AArch64::MOVNXi ||
- MovI->Opcode == AArch64::MOVZXi) &&
- "Expected opcode");
- MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
- .addImm(MovI->Op1)
- .addImm(MovI->Op2);
- }
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- break;
- }
case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir
index dc75c8c61c53c..c944889ede695 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir
@@ -14,8 +14,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK-NEXT: [[MOVZWi:%[0-9]+]]:gpr32common = nsw MOVZWi 79, 0
- ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVZWi]]
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 79
+ ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
@@ -38,8 +38,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[MOVZXi:%[0-9]+]]:gpr64common = nsw MOVZXi 79, 0
- ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVZXi]]
+ ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 79
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
@@ -62,8 +62,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK-NEXT: [[MOVNWi:%[0-9]+]]:gpr32common = nsw MOVNWi 0, 0
- ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVNWi]]
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm -1
+ ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
@@ -86,8 +86,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[MOVNXi:%[0-9]+]]:gpr64common = nsw MOVNXi 0, 0
- ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVNXi]]
+ ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm -1
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
@@ -110,8 +110,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK-NEXT: [[ORRWri:%[0-9]+]]:gpr32common = nsw ORRWri $wzr, 1291
- ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[ORRWri]]
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 16773120
+ ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]]
; CHECK-NEXT: $w0 = COPY [[MADDWrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr32 = COPY $w0
@@ -134,8 +134,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[ORRXri:%[0-9]+]]:gpr64common = nsw ORRXri $xzr, 7435
- ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[ORRXri]]
+ ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 16773120
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]]
; CHECK-NEXT: $x0 = COPY [[MADDXrrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll
index 6e510712fbd21..cc7fc8fc98629 100644
--- a/llvm/test/CodeGen/AArch64/madd-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll
@@ -39,9 +39,8 @@ define void @mul_add_imm2() {
; CHECK-FAST-LABEL: mul_add_imm2:
; CHECK-FAST: ; %bb.0: ; %entry
; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd
-; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd
-; CHECK-FAST-NEXT: madd x8, x8, x8, x9
; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390
+; CHECK-FAST-NEXT: madd x8, x8, x8, x8
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
; CHECK-FAST-NEXT: movk x9, #323, lsl #32
; CHECK-FAST-NEXT: LBB2_1: ; %for.body8
More information about the llvm-commits
mailing list