[llvm] c42053f - [AArch64][GlobalISel] Select arith extended add/sub in manual selection code
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 11 09:26:35 PST 2020
Author: Jessica Paquette
Date: 2020-11-11T09:26:03-08:00
New Revision: c42053f79b6c25154345b1c10c7b7c743e480698
URL: https://github.com/llvm/llvm-project/commit/c42053f79b6c25154345b1c10c7b7c743e480698
DIFF: https://github.com/llvm/llvm-project/commit/c42053f79b6c25154345b1c10c7b7c743e480698.diff
LOG: [AArch64][GlobalISel] Select arith extended add/sub in manual selection code
The manual selection code for add/sub was not checking if it was possible to
fold in shifts + extends (the *rx opcode variants).
As a result, we could never select things like
```
cmp x1, w0, uxtw #2
```
Because we don't import any patterns for compares.
This adds support for the arithmetic shifted register forms and updates tests
for instructions selected using `emitADD`, `emitADDS`, and `emitSUBS`.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91207
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f59eb9cbd824..6f7e48e579be 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -188,7 +188,8 @@ class AArch64InstructionSelector : public InstructionSelector {
/// {{AArch64::ADDXri, AArch64::ADDWri},
/// {AArch64::ADDXrs, AArch64::ADDWrs},
/// {AArch64::ADDXrr, AArch64::ADDWrr},
- /// {AArch64::SUBXri, AArch64::SUBWri}}};
+ /// {AArch64::SUBXri, AArch64::SUBWri},
+ /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
/// \endcode
///
/// Each row in the table corresponds to a
diff erent addressing mode. Each
@@ -199,6 +200,7 @@ class AArch64InstructionSelector : public InstructionSelector {
/// - Row 1: The rs opcode variants
/// - Row 2: The rr opcode variants
/// - Row 3: The ri opcode variants for negative immediates
+ /// - Row 4: The rx opcode variants
///
/// \attention Columns must be structured as follows:
/// - Column 0: The 64-bit opcode variants
@@ -208,7 +210,7 @@ class AArch64InstructionSelector : public InstructionSelector {
/// \p LHS is the left-hand operand of the binop to emit.
/// \p RHS is the right-hand operand of the binop to emit.
MachineInstr *emitAddSub(
- const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
@@ -3821,7 +3823,7 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
}
MachineInstr *AArch64InstructionSelector::emitAddSub(
- const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -3842,6 +3844,11 @@ MachineInstr *AArch64InstructionSelector::emitAddSub(
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);
+ // INSTRrx form.
+ if (auto Fns = selectArithExtendedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
// INSTRrs form.
if (auto Fns = selectShiftedRegister(RHS))
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
@@ -3854,11 +3861,12 @@ MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 4> OpcTable{
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDXri, AArch64::ADDWri},
{AArch64::ADDXrs, AArch64::ADDWrs},
{AArch64::ADDXrr, AArch64::ADDWrr},
- {AArch64::SUBXri, AArch64::SUBWri}}};
+ {AArch64::SUBXri, AArch64::SUBWri},
+ {AArch64::ADDXrx, AArch64::ADDWrx}}};
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
}
@@ -3866,11 +3874,12 @@ MachineInstr *
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 4> OpcTable{
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDSXri, AArch64::ADDSWri},
{AArch64::ADDSXrs, AArch64::ADDSWrs},
{AArch64::ADDSXrr, AArch64::ADDSWrr},
- {AArch64::SUBSXri, AArch64::SUBSWri}}};
+ {AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
@@ -3878,11 +3887,12 @@ MachineInstr *
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 4> OpcTable{
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::SUBSXri, AArch64::SUBSWri},
{AArch64::SUBSXrs, AArch64::SUBSWrs},
{AArch64::SUBSXrr, AArch64::SUBSWrr},
- {AArch64::ADDSXri, AArch64::ADDSWri}}};
+ {AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
index 4d034b917952..d339a879de3e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
@@ -603,3 +603,36 @@ body: |
%cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
+
+...
+---
+name: cmn_arith_extended_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $x0, $x1
+ ; We should be able to fold away the extend + shift and select ADDSXrx.
+
+ ; CHECK-LABEL: name: cmn_arith_extended_shl
+ ; CHECK: liveins: $w0, $x0, $x1
+ ; CHECK: %reg0:gpr64sp = COPY $x0
+ ; CHECK: %reg1:gpr32 = COPY $w0
+ ; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+ ; CHECK: $w0 = COPY %cmp
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s64) = COPY $x0
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %sub:gpr(s64) = G_SUB %zero, %reg0
+
+ %reg1:gpr(s32) = COPY $w0
+ %ext:gpr(s64) = G_SEXT %reg1(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift
+ $w0 = COPY %cmp(s32)
+ RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
index 6f4ee4f50c21..048b60ec76fd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
@@ -182,3 +182,91 @@ body: |
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
+...
+---
+name: cmp_arith_extended_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: cmp_arith_extended_s64
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr64sp = COPY $x1
+ ; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK: $w0 = COPY %cmp
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s64) = COPY $x1
+ %ext:gpr(s64) = G_ZEXT %reg0(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
+ $w0 = COPY %cmp(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmp_arith_extended_s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $h0
+
+ ; CHECK-LABEL: name: cmp_arith_extended_s32
+ ; CHECK: liveins: $w0, $w1, $h0
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
+ ; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: %reg1:gpr32sp = COPY $w1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
+ ; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK: $w0 = COPY %cmp
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s16) = COPY $h0
+ %reg1:gpr(s32) = COPY $w1
+ %ext:gpr(s32) = G_ZEXT %reg0(s16)
+ %cst:gpr(s32) = G_CONSTANT i32 2
+ %shift:gpr(s32) = G_SHL %ext, %cst(s32)
+ %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift
+ $w0 = COPY %cmp(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmp_arith_extended_shl_too_large
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $x1
+
+ ; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx
+
+ ; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %reg1:gpr64 = COPY $x1
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32
+ ; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
+ ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK: $w0 = COPY %cmp
+ ; CHECK: RET_ReallyLR implicit $w0
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s64) = COPY $x1
+ %ext:gpr(s64) = G_ZEXT %reg0(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 5
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
+ $w0 = COPY %cmp(s32)
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
index e360df71fb1b..79ed8f5ff9da 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
@@ -89,3 +89,24 @@ body: |
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
$x0 = COPY %2(p0)
...
+---
+name: ptr_add_arith_extended
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: ptr_add_arith_extended
+ ; CHECK: %reg0:gpr32 = COPY $w0
+ ; CHECK: %ptr:gpr64 = COPY $x1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr
+ ; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18
+ ; CHECK: $x0 = COPY %ptr_add
+ %reg0:gpr(s32) = COPY $w0
+ %ptr:gpr(p0) = COPY $x1
+ %ext:gpr(s64) = G_ZEXT %reg0(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64)
+ $x0 = COPY %ptr_add(p0)
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
index 6280a5e1de00..33252e6e62e5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@@ -136,3 +136,31 @@ body: |
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
$w0 = COPY %add(s32)
RET_ReallyLR implicit $w0
+
+...
+---
+name: uaddo_arith_extended
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $x0
+ ; Check that we get ADDSXrx.
+ ; CHECK-LABEL: name: uaddo_arith_extended
+ ; CHECK: liveins: $w0, $x0
+ ; CHECK: %reg0:gpr64sp = COPY $x0
+ ; CHECK: %reg1:gpr32 = COPY $w0
+ ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
+ ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+ ; CHECK: $x0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $x0
+ %reg0:gpr(s64) = COPY $x0
+ %reg1:gpr(s32) = COPY $w0
+ %ext:gpr(s64) = G_ZEXT %reg1(s32)
+ %cst:gpr(s64) = G_CONSTANT i64 2
+ %shift:gpr(s64) = G_SHL %ext, %cst(s64)
+ %add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift
+ $x0 = COPY %add(s64)
+ RET_ReallyLR implicit $x0
More information about the llvm-commits
mailing list