[llvm] 75358f0 - [AArch64] Lower multiplication by a constant int to madd
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 7 04:35:12 PDT 2022
Author: zhongyunde
Date: 2022-10-07T19:33:47+08:00
New Revision: 75358f060c099c9a290cea43eb15f872ab7f2343
URL: https://github.com/llvm/llvm-project/commit/75358f060c099c9a290cea43eb15f872ab7f2343
DIFF: https://github.com/llvm/llvm-project/commit/75358f060c099c9a290cea43eb15f872ab7f2343.diff
LOG: [AArch64] Lower multiplication by a constant int to madd
Lower a = b * C -1 into madd
a) instcombine change b * C -1 --> b * C + (-1)
b) machine-combine change b * C + (-1) --> madd
Assembler will transform the neg immedate of sub to add, see https://gcc.godbolt.org/z/cTcxePPf4
Fixes AArch64 part of https://github.com/llvm/llvm-project/issues/57255.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D134336
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/addimm-mulimm.ll
llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
llvm/test/CodeGen/AArch64/madd-combiner.ll
llvm/test/CodeGen/AArch64/mul_pow2.ll
llvm/test/CodeGen/AArch64/srem-seteq.ll
llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4167875dbeebb..3994c8200b6f7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5796,7 +5796,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
case MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
- // ==> ORR V, ZR, Imm
+ // ==> MOV V, Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
@@ -5824,13 +5824,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(Imm, BitSize);
- uint64_t Encoding;
- if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ // The immediate can be composed via a single instruction.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
+ if (Insn.size() != 1)
return;
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
+ auto MovI = Insn.begin();
+ MachineInstrBuilder MIB1;
+ // MOV is an alias for one of three instructions: movz, movn, and orr.
+ if (MovI->Opcode == OrrOpc)
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(MovI->Op2);
+ else {
+ if (BitSize == 32)
+ assert((MovI->Opcode == AArch64::MOVNWi ||
+ MovI->Opcode == AArch64::MOVZWi) &&
+ "Expected opcode");
+ else
+ assert((MovI->Opcode == AArch64::MOVNXi ||
+ MovI->Opcode == AArch64::MOVZXi) &&
+ "Expected opcode");
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
+ .addImm(MovI->Op1)
+ .addImm(MovI->Op2);
+ }
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -5888,7 +5906,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
case MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
- // ==> ORR V, ZR, -Imm
+ // ==> MOV V, -Imm
// ==> MADD R,A,B,V // = -Imm + A*B
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
@@ -5915,13 +5933,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
- uint64_t Encoding;
- if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ // The immediate can be composed via a single instruction.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
+ if (Insn.size() != 1)
return;
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
+ auto MovI = Insn.begin();
+ MachineInstrBuilder MIB1;
+ // MOV is an alias for one of three instructions: movz, movn, and orr.
+ if (MovI->Opcode == OrrOpc)
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(MovI->Op2);
+ else {
+ if (BitSize == 32)
+ assert((MovI->Opcode == AArch64::MOVNWi ||
+ MovI->Opcode == AArch64::MOVZWi) &&
+ "Expected opcode");
+ else
+ assert((MovI->Opcode == AArch64::MOVNXi ||
+ MovI->Opcode == AArch64::MOVZXi) &&
+ "Expected opcode");
+ MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
+ .addImm(MovI->Op1)
+ .addImm(MovI->Op2);
+ }
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
diff --git a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
index ef17ff1463fb7..cc6523d1bb1d5 100644
--- a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
@@ -5,8 +5,8 @@ define i64 @addimm_mulimm_accept_00(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_00:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: add x0, x8, #1147
+; CHECK-NEXT: mov x9, #1147
+; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, 31
%tmp1 = mul i64 %tmp0, 37
@@ -17,8 +17,8 @@ define i64 @addimm_mulimm_accept_01(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_01:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul x8, x0, x8
-; CHECK-NEXT: sub x0, x8, #1147
+; CHECK-NEXT: mov x9, #-1147
+; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, -31
%tmp1 = mul i64 %tmp0, 37
@@ -29,8 +29,8 @@ define signext i32 @addimm_mulimm_accept_02(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_02:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: add w0, w8, #1147
+; CHECK-NEXT: mov w9, #1147
+; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, 31
%tmp1 = mul i32 %tmp0, 37
@@ -41,8 +41,8 @@ define signext i32 @addimm_mulimm_accept_03(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_03:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w0, w8, #1147
+; CHECK-NEXT: mov w9, #-1147
+; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, -31
%tmp1 = mul i32 %tmp0, 37
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
index 2b03fa34453ee..b8520896fb682 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
@@ -13,7 +13,7 @@ define dso_local i32 @_Z5func1i(i32 %x) #0 {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: orr w8, wzr, #0x1
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: madd w19, w0, w0, w8
; CHECK-NEXT: mov w0, #4
; CHECK-NEXT: bl __cxa_allocate_exception
@@ -37,7 +37,7 @@ define dso_local i32 @_Z5func2c(i8 %x) #0 {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: mov w0, #4
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: madd w19, w8, w8, w9
; CHECK-NEXT: bl __cxa_allocate_exception
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll
index 07fbcddb307e8..28e80b1f0fd38 100644
--- a/llvm/test/CodeGen/AArch64/madd-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll
@@ -6,7 +6,7 @@
define i32 @mul_add_imm(i32 %a, i32 %b) {
; CHECK-LABEL: mul_add_imm:
; CHECK: ; %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x4
+; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: madd w0, w0, w1, w8
; CHECK-NEXT: ret
%1 = mul i32 %a, %b
@@ -39,7 +39,7 @@ define void @mul_add_imm2() {
; CHECK-FAST-LABEL: mul_add_imm2:
; CHECK-FAST: ; %bb.0: ; %entry
; CHECK-FAST-NEXT: mov x8, #-3
-; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd
+; CHECK-FAST-NEXT: mov x9, #-3
; CHECK-FAST-NEXT: madd x8, x8, x8, x9
; CHECK-FAST-NEXT: mov x9, #45968
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index 30c639abb8a7f..6ec0b62c02105 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -290,6 +290,45 @@ define i64 @test6_smnegl(i32 %x) {
ret i64 %sub
}
+; We may hoist the "mov" instructions out of a loop
+define i32 @mull6_sub(i32 %x) {
+; CHECK-LABEL: mull6_sub:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #6
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: madd w0, w0, w8, w9
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: mull6_sub:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #6
+; GISEL-NEXT: mov w9, #-1
+; GISEL-NEXT: madd w0, w0, w8, w9
+; GISEL-NEXT: ret
+ %mul = mul nsw i32 %x, 6
+ %sub = add nsw i32 %mul, -1
+ ret i32 %sub
+}
+
+define i64 @mull6_sub_orr(i64 %x) {
+; CHECK-LABEL: mull6_sub_orr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #6
+; CHECK-NEXT: mov x9, #16773120
+; CHECK-NEXT: madd x0, x0, x8, x9
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: mull6_sub_orr:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #6
+; GISEL-NEXT: mov x9, #16773120
+; GISEL-NEXT: madd x0, x0, x8, x9
+; GISEL-NEXT: ret
+ %mul = mul nsw i64 %x, 6
+ %sub = add nsw i64 %mul, 16773120
+ ret i64 %sub
+}
+
define i32 @test7(i32 %x) {
; CHECK-LABEL: test7:
; CHECK: // %bb.0:
@@ -731,11 +770,11 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
;
; GISEL-LABEL: muladd_demand_commute:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI42_1
-; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1]
-; GISEL-NEXT: adrp x8, .LCPI42_0
+; GISEL-NEXT: adrp x8, .LCPI44_1
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI44_1]
+; GISEL-NEXT: adrp x8, .LCPI44_0
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
-; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0]
+; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI44_0]
; GISEL-NEXT: and v0.16b, v1.16b, v0.16b
; GISEL-NEXT: ret
%m = mul <4 x i32> %x, <i32 131008, i32 131008, i32 131008, i32 131008>
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq.ll b/llvm/test/CodeGen/AArch64/srem-seteq.ll
index 5192de1a0e882..4bb29d344282f 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq.ll
@@ -47,7 +47,7 @@ define i32 @test_srem_odd_bit30(i32 %X) nounwind {
; CHECK-LABEL: test_srem_odd_bit30:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: movk w8, #27306, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, #3
@@ -64,7 +64,7 @@ define i32 @test_srem_odd_bit31(i32 %X) nounwind {
; CHECK-LABEL: test_srem_odd_bit31:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #21845
-; CHECK-NEXT: orr w9, wzr, #0x1
+; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: movk w8, #54613, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, #3
@@ -122,7 +122,7 @@ define i32 @test_srem_even_bit30(i32 %X) nounwind {
; CHECK-LABEL: test_srem_even_bit30:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #20165
-; CHECK-NEXT: orr w9, wzr, #0x8
+; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: movk w8, #64748, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: ror w8, w8, #3
@@ -140,7 +140,7 @@ define i32 @test_srem_even_bit31(i32 %X) nounwind {
; CHECK-LABEL: test_srem_even_bit31:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1285
-; CHECK-NEXT: orr w9, wzr, #0x2
+; CHECK-NEXT: mov w9, #2
; CHECK-NEXT: movk w8, #50437, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: ror w8, w8, #1
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
index c8627b8df1c92..b3be59dd7b082 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
@@ -137,11 +137,11 @@ define i1 @t32_6_3(i32 %X) nounwind {
; CHECK-LABEL: t32_6_3:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
-; CHECK-NEXT: mov w9, #43691
+; CHECK-NEXT: mov w9, #-1
; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: madd w8, w0, w8, w9
+; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: movk w9, #10922, lsl #16
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w8, w8, #1
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
@@ -209,8 +209,8 @@ define i1 @t8_3_2(i8 %X) nounwind {
; CHECK-LABEL: t8_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-85
-; CHECK-NEXT: mul w8, w0, w8
-; CHECK-NEXT: sub w8, w8, #86
+; CHECK-NEXT: mov w9, #-86
+; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: cmp w8, #85
; CHECK-NEXT: cset w0, lo
More information about the llvm-commits
mailing list