[llvm] [AArch64] Do adc(s) machine instruction combine (PR #113663)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 02:27:59 PDT 2024
https://github.com/ClareTin created https://github.com/llvm/llvm-project/pull/113663
This optimization is just for aarch64, it will identify the cinc i,a,cs and add(s) r, i, b instruction patterns, transform them to adcs(s) r,a,b.
>From 4721f44511c50ee6c3ae35ce427b38f6710903f1 Mon Sep 17 00:00:00 2001
From: ClareTin <tianxiaoqi at huawei.com>
Date: Fri, 25 Oct 2024 17:22:23 +0800
Subject: [PATCH] [AArch64] Do adc(s) machine instruction combine This
optimization is just for aarch64, it will identify the cinc i,a,cs and add(s)
r, i, b instruction patterns, transform them to adcs(s) r,a,b.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 113 ++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 +
.../CodeGen/AArch64/aarch64-adc-combine.mir | 247 ++++++++++++++++++
3 files changed, 364 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-adc-combine.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 32bc0e7d0d6475..bbb30c14b8ee20 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7022,6 +7022,60 @@ AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
}
}
+
+/// Find instructions that can be turned into adc or adcs.
+/// CINC I=A,CS
+/// ADD|S R,I,B or ADD|S R,B,I
+/// ==> ADC|S R,A,B
+/// CINC I=A,CS is alias of CSINC I=A,A,LO which is instruction pattern that
+/// need to be acctually identified.
+static bool getAdcPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ const TargetRegisterInfo *TRI) {
+ unsigned Opc = Root.getOpcode();
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ auto setFound = [&](int Opcode, int Operand, unsigned Pattern) {
+ MachineOperand &MO = Root.getOperand(Operand);
+ if (canCombine(MBB, MO, Opcode)) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineInstr *MI = nullptr;
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI && (findCondCodeUsedByInstr(*MI) == AArch64CC::LO) &&
+ MI->getOperand(1).isIdenticalTo(MI->getOperand(2))) {
+ // Convert only when the condition code is not modified between CINC and
+ // ADD(S). The CC may be used by other instructions in between.
+ if (!areCFlagsAccessedBetweenInstrs(MI, Root, TRI, AK_Write)) {
+ Patterns.push_back(Pattern);
+ Found = true;
+ }
+ }
+ }
+ };
+
+ switch (Opc) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDSWrr:
+ setFound(AArch64::CSINCWr, 1, AArch64MachineCombinerPattern::CINCADD_OP1);
+ if (!Found)
+ setFound(AArch64::CSINCWr, 2, AArch64MachineCombinerPattern::CINCADD_OP2);
+ break;
+ case AArch64::ADDXrr:
+ case AArch64::ADDSXrr:
+ setFound(AArch64::CSINCXr, 1, AArch64MachineCombinerPattern::CINCADD_OP1);
+ if (!Found)
+ setFound(AArch64::CSINCXr, 2, AArch64MachineCombinerPattern::CINCADD_OP2);
+ break;
+ default:
+ break;
+ }
+
+ return Found;
+}
+
+
/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the
@@ -7033,6 +7087,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
// Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
+ if (getAdcPatterns(Root, Patterns, &getRegisterInfo()))
+ return true;
// Floating point patterns
if (getFMULPatterns(Root, Patterns))
return true;
@@ -7390,6 +7446,47 @@ genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
DelInstrs.push_back(&Root);
}
+/// Do the following transformation
+/// CINC I=A,CS
+/// ADD|S R,I,B
+/// ==> ADC|S R,A,B
+static void genAdc(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ unsigned CombineOpdIdx) {
+ assert(CombineOpdIdx == 1 || CombineOpdIdx == 2);
+
+ unsigned AnotherOpdIdx = CombineOpdIdx == 1 ? 2 : 1;
+ MachineInstr *CincMI =
+ MRI.getUniqueVRegDef(Root.getOperand(CombineOpdIdx).getReg());
+
+ Register ResultReg = Root.getOperand(0).getReg();
+ Register RegA = CincMI->getOperand(1).getReg();
+ unsigned RegAState = getRegState(CincMI->getOperand(1));
+ Register RegB = Root.getOperand(AnotherOpdIdx).getReg();
+ unsigned RegBState = getRegState(Root.getOperand(AnotherOpdIdx));
+
+ unsigned Opcode = Root.getOpcode();
+ unsigned OptOpcode = AArch64::ADCWr;
+ if (Opcode == AArch64::ADDXrr)
+ OptOpcode = AArch64::ADCXr;
+ else if (Opcode == AArch64::ADDSWrr)
+ OptOpcode = AArch64::ADCSWr;
+ else if (Opcode == AArch64::ADDSXrr)
+ OptOpcode = AArch64::ADCSXr;
+ else
+ assert(Opcode == AArch64::ADDWrr && "Unexpected instruction opcode.");
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OptOpcode), ResultReg)
+ .addReg(RegA, RegAState)
+ .addReg(RegB, RegBState);
+
+ InsInstrs.push_back(MIB);
+ DelInstrs.push_back(CincMI);
+}
+
/// When getMachineCombinerPatterns() finds potential patterns,
/// this function generates the instructions that could replace the
/// original code sequence
@@ -7516,6 +7613,22 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
+ case AArch64MachineCombinerPattern::CINCADD_OP1: {
+ // CINC I=A,CS
+ // ADD|S R,I,B
+ // ==> ADC|S R,A,B
+ // --- Create ADC|S
+ genAdc(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1);
+ break;
+ }
+ case AArch64MachineCombinerPattern::CINCADD_OP2: {
+ // CINC I=A,CS
+ // ADD|S R,B,I
+ // ==> ADC|S R,A,B
+ // --- Create ADC|S
+ genAdc(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2);
+ break;
+ }
case AArch64MachineCombinerPattern::MULSUBW_OP1:
case AArch64MachineCombinerPattern::MULSUBX_OP1: {
// MUL I=A,B,0
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index e37f70f7d985de..e58f18fef8f0d6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -52,6 +52,10 @@ enum AArch64MachineCombinerPattern : unsigned {
MULSUBX_OP2,
MULADDXI_OP1,
MULSUBXI_OP1,
+
+ CINCADD_OP1,
+ CINCADD_OP2,
+
// NEON integers vectors
MULADDv8i8_OP1,
MULADDv8i8_OP2,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-adc-combine.mir b/llvm/test/CodeGen/AArch64/aarch64-adc-combine.mir
new file mode 100644
index 00000000000000..d4f0a873ce053e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-adc-combine.mir
@@ -0,0 +1,247 @@
+# REQUIRES: enable_bspriv_aarch64
+# RUN: llc -o - %s -mtriple=aarch64-unknown-unknown -run-pass=machine-combiner | FileCheck %s
+
+---
+name: _Z18adc_combine_64bitsmmRmS_S_
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: gpr64common }
+ - { id: 3, class: gpr64common }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr64 }
+ - { id: 6, class: gpr64 }
+ - { id: 7, class: gpr64 }
+ - { id: 8, class: gpr64 }
+ - { id: 9, class: gpr64 }
+ - { id: 10, class: gpr64common }
+ - { id: 11, class: gpr64common }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+ - { reg: '$x2', virtual-reg: '%2' }
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: _Z18adc_combine_64bitsmmRmS_S_
+ ; CHECK: bb.0.entry:
+ ; CHECK: ADCSXr
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ %4:gpr64common = COPY $x4
+ %3:gpr64common = COPY $x3
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %5:gpr64 = LDRXui %2, 0
+ %6:gpr64 = ADDSXrr killed %5, %0, implicit-def $nzcv
+ STRXui killed %6, %2, 0
+ %7:gpr64 = CSINCXr %1, %1, 3, implicit $nzcv
+ %8:gpr64 = LDRXui %3, 0
+ %9:gpr64 = ADDSXrr killed %7, killed %8, implicit-def $nzcv
+ STRXui killed %9, %3, 0
+ Bcc 3, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ %10:gpr64common = LDRXui %4, 0
+ %11:gpr64common = ADDXri killed %10, 1, 0
+ STRXui killed %11, %4, 0
+
+ bb.2:
+ RET_ReallyLR
+
+...
+---
+name: _Z18adc_combine_32bitsjjRjS_S_
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: gpr64common }
+ - { id: 3, class: gpr64common }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+ - { id: 8, class: gpr32 }
+ - { id: 9, class: gpr32 }
+ - { id: 10, class: gpr32common }
+ - { id: 11, class: gpr32common }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+ - { reg: '$x2', virtual-reg: '%2' }
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: _Z18adc_combine_32bitsjjRjS_S_
+ ; CHECK: bb.0.entry:
+ ; CHECK: ADCSWr
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $w0, $w1, $x2, $x3, $x4
+
+ %4:gpr64common = COPY $x4
+ %3:gpr64common = COPY $x3
+ %2:gpr64common = COPY $x2
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %5:gpr32 = LDRWui %2, 0
+ %6:gpr32 = ADDSWrr killed %5, %0, implicit-def $nzcv
+ STRWui killed %6, %2, 0
+ %7:gpr32 = CSINCWr %1, %1, 3, implicit $nzcv
+ %8:gpr32 = LDRWui %3, 0
+ %9:gpr32 = ADDSWrr killed %7, killed %8, implicit-def $nzcv
+ STRWui killed %9, %3, 0
+ Bcc 3, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ %10:gpr32common = LDRWui %4, 0
+ %11:gpr32common = ADDWri killed %10, 1, 0
+ STRWui killed %11, %4, 0
+
+ bb.2:
+ RET_ReallyLR
+
+...
+---
+name: _Z23adc_combine_fail_64bitsmmRmS_S_
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: gpr64common }
+ - { id: 3, class: gpr64common }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr64 }
+ - { id: 6, class: gpr64common }
+ - { id: 7, class: gpr64 }
+ - { id: 8, class: gpr64 }
+ - { id: 9, class: gpr64 }
+ - { id: 10, class: gpr64 }
+ - { id: 11, class: gpr64 }
+ - { id: 12, class: gpr64common }
+ - { id: 13, class: gpr64common }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+ - { reg: '$x2', virtual-reg: '%2' }
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: _Z23adc_combine_fail_64bitsmmRmS_S_
+ ; CHECK: bb.0.entry:
+ ; CHECK-NOT: ADCSXr
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ %4:gpr64common = COPY $x4
+ %3:gpr64common = COPY $x3
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %5:gpr64 = LDRXui %2, 0
+ %6:gpr64common = ADDSXrr killed %5, %0, implicit-def $nzcv
+ %7:gpr64 = CSINCXr %1, %1, 3, implicit $nzcv
+ %8:gpr64 = SUBSXri %6, 0, 0, implicit-def $nzcv
+ %9:gpr64 = CSINCXr %6, $xzr, 1, implicit $nzcv
+ STRXui killed %9, %2, 0
+ %10:gpr64 = LDRXui %3, 0
+ %11:gpr64 = ADDSXrr killed %7, killed %10, implicit-def $nzcv
+ STRXui killed %11, %3, 0
+ Bcc 3, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ %12:gpr64common = LDRXui %4, 0
+ %13:gpr64common = ADDXri killed %12, 1, 0
+ STRXui killed %13, %4, 0
+
+ bb.2:
+ RET_ReallyLR
+
+...
+---
+name: _Z23adc_combine_fail_32bitsjjRjS_S_
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: gpr64common }
+ - { id: 3, class: gpr64common }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32common }
+ - { id: 7, class: gpr32 }
+ - { id: 8, class: gpr32 }
+ - { id: 9, class: gpr32 }
+ - { id: 10, class: gpr32 }
+ - { id: 11, class: gpr32 }
+ - { id: 12, class: gpr32common }
+ - { id: 13, class: gpr32common }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+ - { reg: '$x2', virtual-reg: '%2' }
+ - { reg: '$x3', virtual-reg: '%3' }
+ - { reg: '$x4', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: _Z23adc_combine_fail_32bitsjjRjS_S_
+ ; CHECK: bb.0.entry:
+ ; CHECK-NOT: ADCSWr
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $w0, $w1, $x2, $x3, $x4
+
+ %4:gpr64common = COPY $x4
+ %3:gpr64common = COPY $x3
+ %2:gpr64common = COPY $x2
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %5:gpr32 = LDRWui %2, 0
+ %6:gpr32common = ADDSWrr killed %5, %0, implicit-def $nzcv
+ %7:gpr32 = CSINCWr %1, %1, 3, implicit $nzcv
+ %8:gpr32 = SUBSWri %6, 0, 0, implicit-def $nzcv
+ %9:gpr32 = CSINCWr %6, $wzr, 1, implicit $nzcv
+ STRWui killed %9, %2, 0
+ %10:gpr32 = LDRWui %3, 0
+ %11:gpr32 = ADDSWrr killed %7, killed %10, implicit-def $nzcv
+ STRWui killed %11, %3, 0
+ Bcc 3, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ %12:gpr32common = LDRWui %4, 0
+ %13:gpr32common = ADDWri killed %12, 1, 0
+ STRWui killed %13, %4, 0
+
+ bb.2:
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list