[llvm] 67bf3ac - [AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 20 16:36:28 PDT 2021


Author: Amara Emerson
Date: 2021-08-20T16:36:23-07:00
New Revision: 67bf3ac7446bd8ba16bb5ef82fa9f28a848616da

URL: https://github.com/llvm/llvm-project/commit/67bf3ac7446bd8ba16bb5ef82fa9f28a848616da
DIFF: https://github.com/llvm/llvm-project/commit/67bf3ac7446bd8ba16bb5ef82fa9f28a848616da.diff

LOG: [AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.

Truncating stores with GPR bank sources shouldn't be mutated into using FPR bank
sources, since those aren't supported.

Ideally this should be a selection failure in the tablegen patterns, but for now
avoid generating them.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bce7f94a5043..15bdc81330f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,6 +22,7 @@
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -104,7 +105,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
 
   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
-  bool contractCrossBankCopyIntoStore(MachineInstr &I,
+  bool contractCrossBankCopyIntoStore(GStore &I,
                                       MachineRegisterInfo &MRI);
 
   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -1934,8 +1935,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     return true;
   }
   case TargetOpcode::G_STORE: {
-    bool Changed = contractCrossBankCopyIntoStore(I, MRI);
-    MachineOperand &SrcOp = I.getOperand(0);
+    auto &StoreMI = cast<GStore>(I);
+    bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI);
+    MachineOperand &SrcOp = StoreMI.getOperand(0);
     if (MRI.getType(SrcOp.getReg()).isPointer()) {
       // Allow matching with imported patterns for stores of pointers. Unlike
       // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
@@ -1946,6 +1948,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
       RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
       Changed = true;
     }
+#if 0
+    // Now look for truncating stores to the FPR bank. We don't support these,
+    // but since truncating store formation happens before RBS, we can only
+    // split them up again here. We don't want to assign truncstores to GPR only
+    // since that would have a perf impact due to extra moves.
+    LLT SrcTy = MRI.getType(SrcReg);
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+      if (SrcTy.isScalar() &&
+          SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) {
+        // Generate an explicit truncate and make this into a non-truncating
+        // store.
+        auto Trunc =
+            MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg);
+        MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
+        if (!select(*Trunc)) {
+          return false;
+        }
+        SrcOp.setReg(Trunc.getReg(0));
+        return true;
+      }
+    }
+#endif
     return Changed;
   }
   case TargetOpcode::G_PTR_ADD:
@@ -2081,8 +2105,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
 }
 
 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
-    MachineInstr &I, MachineRegisterInfo &MRI) {
-  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
+    GStore &StoreMI, MachineRegisterInfo &MRI) {
   // If we're storing a scalar, it doesn't matter what register bank that
   // scalar is on. All that matters is the size.
   //
@@ -2097,11 +2120,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
   // G_STORE %x:gpr(s32)
   //
   // And then continue the selection process normally.
-  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
+  Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI);
   if (!DefDstReg.isValid())
     return false;
   LLT DefDstTy = MRI.getType(DefDstReg);
-  Register StoreSrcReg = I.getOperand(0).getReg();
+  Register StoreSrcReg = StoreMI.getValueReg();
   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
 
   // If we get something strange like a physical register, then we shouldn't
@@ -2113,12 +2136,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
     return false;
 
+  // Is this store a truncating one?
+  if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits())
+    return false;
+
   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
       RBI.getRegBank(DefDstReg, MRI, TRI))
     return false;
 
   // We have a cross-bank copy, which is entering a store. Let's fold it.
-  I.getOperand(0).setReg(DefDstReg);
+  StoreMI.getOperand(0).setReg(DefDstReg);
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
index 61a70dd78265..cb92b6a58ba5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
@@ -1,15 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 
---- |
-  define void @contract_s64_gpr(i64* %addr) { ret void }
-  define void @contract_s32_gpr(i32* %addr) { ret void }
-  define void @contract_s64_fpr(i64* %addr) { ret void }
-  define void @contract_s32_fpr(i32* %addr) { ret void }
-  define void @contract_s16_fpr(i16* %addr) { ret void }
-  define void @contract_g_unmerge_values_first(i128* %addr) { ret void }
-  define void @contract_g_unmerge_values_second(i128* %addr) { ret void }
-...
 ---
 name:            contract_s64_gpr
 legalized:       true
@@ -20,11 +11,11 @@ body:             |
     ; CHECK-LABEL: name: contract_s64_gpr
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+    ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64))
     %0:gpr(p0) = COPY $x0
     %1:gpr(s64) = COPY $x1
     %2:fpr(s64) = COPY %1
-    G_STORE  %2:fpr(s64), %0 :: (store (s64) into %ir.addr)
+    G_STORE  %2:fpr(s64), %0 :: (store (s64))
 ...
 ---
 name:            contract_s32_gpr
@@ -36,11 +27,11 @@ body:             |
     ; CHECK-LABEL: name: contract_s32_gpr
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
+    ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32))
     %0:gpr(p0) = COPY $x0
     %1:gpr(s32) = COPY $w1
     %2:fpr(s32) = COPY %1
-    G_STORE  %2:fpr(s32), %0 :: (store (s32) into %ir.addr)
+    G_STORE  %2:fpr(s32), %0 :: (store (s32))
 ...
 ---
 name:            contract_s64_fpr
@@ -52,11 +43,11 @@ body:             |
     ; CHECK-LABEL: name: contract_s64_fpr
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+    ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
     %0:gpr(p0) = COPY $x0
     %1:fpr(s64) = COPY $d1
     %2:gpr(s64) = COPY %1
-    G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr)
+    G_STORE %2:gpr(s64), %0 :: (store (s64))
 ...
 ---
 name:            contract_s32_fpr
@@ -68,11 +59,11 @@ body:             |
     ; CHECK-LABEL: name: contract_s32_fpr
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
+    ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32))
     %0:gpr(p0) = COPY $x0
     %1:fpr(s32) = COPY $s1
     %2:gpr(s32) = COPY %1
-    G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr)
+    G_STORE %2:gpr(s32), %0 :: (store (s32))
 ...
 ---
 name:            contract_s16_fpr
@@ -84,11 +75,11 @@ body:             |
     ; CHECK-LABEL: name: contract_s16_fpr
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
-    ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr)
+    ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16))
     %0:gpr(p0) = COPY $x0
     %1:fpr(s16) = COPY $h1
     %2:gpr(s16) = COPY %1
-    G_STORE  %2:gpr(s16), %0 :: (store (s16) into %ir.addr)
+    G_STORE  %2:gpr(s16), %0 :: (store (s16))
 ...
 ---
 name:            contract_g_unmerge_values_first
@@ -99,15 +90,16 @@ body:             |
     liveins: $x0, $x1
     ; CHECK-LABEL: name: contract_g_unmerge_values_first
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
-    ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub
-    ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+    ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
+    ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
     %0:gpr(p0) = COPY $x0
-    %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
+    %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
     %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
     %4:gpr(s64) = COPY %2
     %5:gpr(s64) = COPY %3
-    G_STORE  %4:gpr(s64), %0 :: (store (s64) into %ir.addr)
+    G_STORE  %4:gpr(s64), %0 :: (store (s64))
 ...
 ---
 name:            contract_g_unmerge_values_second
@@ -118,12 +110,31 @@ body:             |
     liveins: $x0, $x1
     ; CHECK-LABEL: name: contract_g_unmerge_values_second
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
-    ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1
-    ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+    ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
+    ; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64))
     %0:gpr(p0) = COPY $x0
-    %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
+    %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
     %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
     %4:gpr(s64) = COPY %2
     %5:gpr(s64) = COPY %3
-    G_STORE  %5:gpr(s64), %0 :: (store (s64) into %ir.addr)
+    G_STORE  %5:gpr(s64), %0 :: (store (s64))
+...
+---
+name:            contract_s16_truncstore
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $x0, $s1
+    ; CHECK-LABEL: name: contract_s16_truncstore
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+    ; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16))
+    %0:gpr(p0) = COPY $x0
+    %1:fpr(s32) = COPY $s1
+    %2:gpr(s32) = COPY %1
+    G_STORE  %2:gpr(s32), %0 :: (store (s16))
+...


        


More information about the llvm-commits mailing list