[llvm] b911b99 - [AArch64][GlobalISel] Don't reconvert to p0 in convertPtrAddToAdd().

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 3 11:50:41 PST 2020


Author: Amara Emerson
Date: 2020-02-03T11:50:22-08:00
New Revision: b911b99052e90b9a011bcdf1a702d85eb86973d8

URL: https://github.com/llvm/llvm-project/commit/b911b99052e90b9a011bcdf1a702d85eb86973d8
DIFF: https://github.com/llvm/llvm-project/commit/b911b99052e90b9a011bcdf1a702d85eb86973d8.diff

LOG: [AArch64][GlobalISel] Don't reconvert to p0 in convertPtrAddToAdd().

convertPtrAddToAdd improved overall code size and quality by a significant amount,
but on -O0 we generate some cross-class copies due to the fact that we emitted
G_PTRTOINT and G_INTTOPTR around the G_ADD. Unfortunately at -O0 we don't run any
register coalescing, so these cross class copies end up escaping as moves, and
we ended up regressing 3 benchmarks on CTMark (though still a winner overall).

This patch changes the lowering to instead directly emit the G_ADD into the
destination register, and then force changes the dest LLT to s64 from p0. This
should be ok, as all uses of the register should now be selected and therefore
the LLT doesn't matter for the users. It does however matter for the importer
patterns, which will fail to select a G_ADD if there's a p0 LLT.

I'm not able to get rid of the G_PTRTOINT on the source yet however. We can't
use the same trick of breaking the type system since that could break the
selection of the defining instruction. Thus with -O0 we still end up with a
cross class copy on source.

Code size improvements on -O0:
Program                                         baseline      new         diff
 test-suite :: CTMark/Bullet/bullet.test        965520       949164      -1.7%
 test-suite...TMark/7zip/7zip-benchmark.test    1069456      1052600     -1.6%
 test-suite...ark/tramp3d-v4/tramp3d-v4.test    1213692      1199804     -1.1%
 test-suite...:: CTMark/sqlite3/sqlite3.test    421680       419736      -0.5%
 test-suite...-typeset/consumer-typeset.test    837076       833380      -0.4%
 test-suite :: CTMark/lencod/lencod.test        799712       796976      -0.3%
 test-suite...:: CTMark/ClamAV/clamscan.test    688264       686132      -0.3%
 test-suite :: CTMark/kimwitu++/kc.test         1002344      999648      -0.3%
 test-suite...Mark/mafft/pairlocalalign.test    422296       421768      -0.1%
 test-suite :: CTMark/SPASS/SPASS.test          656792       656532      -0.0%
 Geomean difference                                                      -0.6%

Differential Revision: https://reviews.llvm.org/D73910

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 790ea72dff4d..33d8a0817b0d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1408,9 +1408,7 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
 }
 
 /// This lowering tries to look for G_PTR_ADD instructions and then converts
-/// them to a standard G_ADD with a COPY on the source, and G_INTTOPTR on the
-/// result. This is ok for address space 0 on AArch64 as p0 can be treated as
-/// s64.
+/// them to a standard G_ADD with a COPY on the source.
 ///
 /// The motivation behind this is to expose the add semantics to the imported
 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
@@ -1422,7 +1420,6 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
   Register DstReg = I.getOperand(0).getReg();
   Register AddOp1Reg = I.getOperand(1).getReg();
-  Register AddOp2Reg = I.getOperand(2).getReg();
   const LLT PtrTy = MRI.getType(DstReg);
   if (PtrTy.getAddressSpace() != 0)
     return false;
@@ -1434,23 +1431,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
   MachineIRBuilder MIB(I);
   const LLT s64 = LLT::scalar(64);
   auto PtrToInt = MIB.buildPtrToInt(s64, AddOp1Reg);
-  auto Add = MIB.buildAdd(s64, PtrToInt, AddOp2Reg);
   // Set regbanks on the registers.
   MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
-  MRI.setRegBank(Add.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
-
-  // Now turn the %dst = G_PTR_ADD %base, off into:
-  // %dst = G_INTTOPTR %Add
-  I.setDesc(TII.get(TargetOpcode::G_INTTOPTR));
-  I.getOperand(1).setReg(Add.getReg(0));
-  I.RemoveOperand(2);
-
-  // We need to manually call select on these because new instructions don't
-  // get added to the selection queue.
-  if (!select(*Add)) {
-    LLVM_DEBUG(dbgs() << "Failed to select G_ADD in convertPtrAddToAdd");
-    return false;
-  }
+
+  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
+  // %dst(s64) = G_ADD %intbase, off
+  I.setDesc(TII.get(TargetOpcode::G_ADD));
+  MRI.setType(DstReg, s64);
+  I.getOperand(1).setReg(PtrToInt.getReg(0));
   if (!select(*PtrToInt)) {
     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
     return false;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
index 87777c88e54e..8b8d66541469 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
@@ -93,11 +93,10 @@ body:             |
     ; CHECK: liveins: $x0, $x1
     ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]]
-    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
-    ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[LDRXui]]
+    ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]]
+    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+    ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]]
     ; CHECK: $x0 = COPY [[ADDXrr1]]
     ; CHECK: RET_ReallyLR implicit $x0
     %0:gpr(p0) = COPY $x0
@@ -387,13 +386,12 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
     ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[UBFMXri]]
-    ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]]
-    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
+    ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
+    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
     ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
     ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
-    ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXrr1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+    ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
     ; CHECK: $x2 = COPY [[ADDXrr2]]
     ; CHECK: RET_ReallyLR implicit $x2
     %0:gpr(s64) = COPY $x0
@@ -459,10 +457,9 @@ body:             |
     ; CHECK: liveins: $x0, $x1, $x2
     ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY1]], [[COPY]], 3
-    ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[ADDXrs]]
-    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
-    ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr)
+    ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3
+    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr)
+    ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr)
     ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]]
     ; CHECK: $x2 = COPY [[ADDXrr]]
     ; CHECK: RET_ReallyLR implicit $x2

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index e85f7f64f02c..e64b62699ec4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -106,8 +106,8 @@ registers:
 
 # CHECK:  body:
 # CHECK: %0:gpr64 = COPY $x0
-# CHECK: %5:gpr32 = MOVi32imm 10000
-# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %5, %subreg.sub_32
+# CHECK: %4:gpr32 = MOVi32imm 10000
+# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %4, %subreg.sub_32
 # CHECK: %{{[0-9]+}}:gpr64 = ADDXrr %0, %1
 body:             |
   bb.0:


        


More information about the llvm-commits mailing list