[llvm] b911b99 - [AArch64][GlobalISel] Don't reconvert to p0 in convertPtrAddToAdd().
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 3 11:50:41 PST 2020
Author: Amara Emerson
Date: 2020-02-03T11:50:22-08:00
New Revision: b911b99052e90b9a011bcdf1a702d85eb86973d8
URL: https://github.com/llvm/llvm-project/commit/b911b99052e90b9a011bcdf1a702d85eb86973d8
DIFF: https://github.com/llvm/llvm-project/commit/b911b99052e90b9a011bcdf1a702d85eb86973d8.diff
LOG: [AArch64][GlobalISel] Don't reconvert to p0 in convertPtrAddToAdd().
convertPtrAddToAdd improved overall code size and quality by a significant amount,
but on -O0 we generate some cross-class copies due to the fact that we emitted
G_PTRTOINT and G_INTTOPTR around the G_ADD. Unfortunately at -O0 we don't run any
register coalescing, so these cross class copies end up escaping as moves, and
we ended up regressing 3 benchmarks on CTMark (though still a winner overall).
This patch changes the lowering to instead directly emit the G_ADD into the
destination register, and then force changes the dest LLT to s64 from p0. This
should be ok, as all uses of the register should now be selected and therefore
the LLT doesn't matter for the users. It does however matter for the importer
patterns, which will fail to select a G_ADD if there's a p0 LLT.
I'm not able to get rid of the G_PTRTOINT on the source yet however. We can't
use the same trick of breaking the type system since that could break the
selection of the defining instruction. Thus with -O0 we still end up with a
cross class copy on source.
Code size improvements on -O0:
Program baseline new diff
test-suite :: CTMark/Bullet/bullet.test 965520 949164 -1.7%
test-suite...TMark/7zip/7zip-benchmark.test 1069456 1052600 -1.6%
test-suite...ark/tramp3d-v4/tramp3d-v4.test 1213692 1199804 -1.1%
test-suite...:: CTMark/sqlite3/sqlite3.test 421680 419736 -0.5%
test-suite...-typeset/consumer-typeset.test 837076 833380 -0.4%
test-suite :: CTMark/lencod/lencod.test 799712 796976 -0.3%
test-suite...:: CTMark/ClamAV/clamscan.test 688264 686132 -0.3%
test-suite :: CTMark/kimwitu++/kc.test 1002344 999648 -0.3%
test-suite...Mark/mafft/pairlocalalign.test 422296 421768 -0.1%
test-suite :: CTMark/SPASS/SPASS.test 656792 656532 -0.0%
Geomean difference -0.6%
Differential Revision: https://reviews.llvm.org/D73910
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
llvm/test/CodeGen/AArch64/GlobalISel/select.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 790ea72dff4d..33d8a0817b0d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1408,9 +1408,7 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
/// This lowering tries to look for G_PTR_ADD instructions and then converts
-/// them to a standard G_ADD with a COPY on the source, and G_INTTOPTR on the
-/// result. This is ok for address space 0 on AArch64 as p0 can be treated as
-/// s64.
+/// them to a standard G_ADD with a COPY on the source.
///
/// The motivation behind this is to expose the add semantics to the imported
/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
@@ -1422,7 +1420,6 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
Register DstReg = I.getOperand(0).getReg();
Register AddOp1Reg = I.getOperand(1).getReg();
- Register AddOp2Reg = I.getOperand(2).getReg();
const LLT PtrTy = MRI.getType(DstReg);
if (PtrTy.getAddressSpace() != 0)
return false;
@@ -1434,23 +1431,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
MachineIRBuilder MIB(I);
const LLT s64 = LLT::scalar(64);
auto PtrToInt = MIB.buildPtrToInt(s64, AddOp1Reg);
- auto Add = MIB.buildAdd(s64, PtrToInt, AddOp2Reg);
// Set regbanks on the registers.
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- MRI.setRegBank(Add.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
-
- // Now turn the %dst = G_PTR_ADD %base, off into:
- // %dst = G_INTTOPTR %Add
- I.setDesc(TII.get(TargetOpcode::G_INTTOPTR));
- I.getOperand(1).setReg(Add.getReg(0));
- I.RemoveOperand(2);
-
- // We need to manually call select on these because new instructions don't
- // get added to the selection queue.
- if (!select(*Add)) {
- LLVM_DEBUG(dbgs() << "Failed to select G_ADD in convertPtrAddToAdd");
- return false;
- }
+
+ // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
+ // %dst(s64) = G_ADD %intbase, off
+ I.setDesc(TII.get(TargetOpcode::G_ADD));
+ MRI.setType(DstReg, s64);
+ I.getOperand(1).setReg(PtrToInt.getReg(0));
if (!select(*PtrToInt)) {
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
index 87777c88e54e..8b8d66541469 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
@@ -93,11 +93,10 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]]
- ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
- ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
- ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[LDRXui]]
+ ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]]
+ ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]]
; CHECK: $x0 = COPY [[ADDXrr1]]
; CHECK: RET_ReallyLR implicit $x0
%0:gpr(p0) = COPY $x0
@@ -387,13 +386,12 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[UBFMXri]]
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]]
- ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
+ ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
+ ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
- ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
- ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXrr1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
; CHECK: $x2 = COPY [[ADDXrr2]]
; CHECK: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
@@ -459,10 +457,9 @@ body: |
; CHECK: liveins: $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY1]], [[COPY]], 3
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[ADDXrs]]
- ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
- ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
+ ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3
+ ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr)
+ ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr)
; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]]
; CHECK: $x2 = COPY [[ADDXrr]]
; CHECK: RET_ReallyLR implicit $x2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index e85f7f64f02c..e64b62699ec4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -106,8 +106,8 @@ registers:
# CHECK: body:
# CHECK: %0:gpr64 = COPY $x0
-# CHECK: %5:gpr32 = MOVi32imm 10000
-# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %5, %subreg.sub_32
+# CHECK: %4:gpr32 = MOVi32imm 10000
+# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %4, %subreg.sub_32
# CHECK: %{{[0-9]+}}:gpr64 = ADDXrr %0, %1
body: |
bb.0:
More information about the llvm-commits
mailing list