[llvm] 67bf3ac - [AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 20 16:36:28 PDT 2021
Author: Amara Emerson
Date: 2021-08-20T16:36:23-07:00
New Revision: 67bf3ac7446bd8ba16bb5ef82fa9f28a848616da
URL: https://github.com/llvm/llvm-project/commit/67bf3ac7446bd8ba16bb5ef82fa9f28a848616da
DIFF: https://github.com/llvm/llvm-project/commit/67bf3ac7446bd8ba16bb5ef82fa9f28a848616da.diff
LOG: [AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.
Truncating stores with GPR bank sources shouldn't be mutated into using FPR bank
sources, since those aren't supported.
Ideally this should be a selection failure in the tablegen patterns, but for now
avoid generating them.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bce7f94a5043..15bdc81330f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -104,7 +105,7 @@ class AArch64InstructionSelector : public InstructionSelector {
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
- bool contractCrossBankCopyIntoStore(MachineInstr &I,
+ bool contractCrossBankCopyIntoStore(GStore &I,
MachineRegisterInfo &MRI);
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -1934,8 +1935,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
return true;
}
case TargetOpcode::G_STORE: {
- bool Changed = contractCrossBankCopyIntoStore(I, MRI);
- MachineOperand &SrcOp = I.getOperand(0);
+ auto &StoreMI = cast<GStore>(I);
+ bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI);
+ MachineOperand &SrcOp = StoreMI.getOperand(0);
if (MRI.getType(SrcOp.getReg()).isPointer()) {
// Allow matching with imported patterns for stores of pointers. Unlike
// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
@@ -1946,6 +1948,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
Changed = true;
}
+#if 0
+ // Now look for truncating stores to the FPR bank. We don't support these,
+ // but since truncating store formation happens before RBS, we can only
+ // split them up again here. We don't want to assign truncstores to GPR only
+ // since that would have a perf impact due to extra moves.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+ if (SrcTy.isScalar() &&
+ SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) {
+ // Generate an explicit truncate and make this into a non-truncating
+ // store.
+ auto Trunc =
+ MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg);
+ MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
+ if (!select(*Trunc)) {
+ return false;
+ }
+ SrcOp.setReg(Trunc.getReg(0));
+ return true;
+ }
+ }
+#endif
return Changed;
}
case TargetOpcode::G_PTR_ADD:
@@ -2081,8 +2105,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
}
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
- MachineInstr &I, MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
+ GStore &StoreMI, MachineRegisterInfo &MRI) {
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
//
@@ -2097,11 +2120,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
- Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
+ Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI);
if (!DefDstReg.isValid())
return false;
LLT DefDstTy = MRI.getType(DefDstReg);
- Register StoreSrcReg = I.getOperand(0).getReg();
+ Register StoreSrcReg = StoreMI.getValueReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
// If we get something strange like a physical register, then we shouldn't
@@ -2113,12 +2136,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return false;
+ // Is this store a truncating one?
+ if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits())
+ return false;
+
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return false;
// We have a cross-bank copy, which is entering a store. Let's fold it.
- I.getOperand(0).setReg(DefDstReg);
+ StoreMI.getOperand(0).setReg(DefDstReg);
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
index 61a70dd78265..cb92b6a58ba5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
@@ -1,15 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
---- |
- define void @contract_s64_gpr(i64* %addr) { ret void }
- define void @contract_s32_gpr(i32* %addr) { ret void }
- define void @contract_s64_fpr(i64* %addr) { ret void }
- define void @contract_s32_fpr(i32* %addr) { ret void }
- define void @contract_s16_fpr(i16* %addr) { ret void }
- define void @contract_g_unmerge_values_first(i128* %addr) { ret void }
- define void @contract_g_unmerge_values_second(i128* %addr) { ret void }
-...
---
name: contract_s64_gpr
legalized: true
@@ -20,11 +11,11 @@ body: |
; CHECK-LABEL: name: contract_s64_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+ ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%2:fpr(s64) = COPY %1
- G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr)
+ G_STORE %2:fpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_gpr
@@ -36,11 +27,11 @@ body: |
; CHECK-LABEL: name: contract_s32_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
+ ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%2:fpr(s32) = COPY %1
- G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr)
+ G_STORE %2:fpr(s32), %0 :: (store (s32))
...
---
name: contract_s64_fpr
@@ -52,11 +43,11 @@ body: |
; CHECK-LABEL: name: contract_s64_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
- ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+ ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(s64) = COPY $d1
%2:gpr(s64) = COPY %1
- G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr)
+ G_STORE %2:gpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_fpr
@@ -68,11 +59,11 @@ body: |
; CHECK-LABEL: name: contract_s32_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
- ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
+ ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:fpr(s32) = COPY $s1
%2:gpr(s32) = COPY %1
- G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr)
+ G_STORE %2:gpr(s32), %0 :: (store (s32))
...
---
name: contract_s16_fpr
@@ -84,11 +75,11 @@ body: |
; CHECK-LABEL: name: contract_s16_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
- ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr)
+ ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16))
%0:gpr(p0) = COPY $x0
%1:fpr(s16) = COPY $h1
%2:gpr(s16) = COPY %1
- G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr)
+ G_STORE %2:gpr(s16), %0 :: (store (s16))
...
---
name: contract_g_unmerge_values_first
@@ -99,15 +90,16 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_first
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
- ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub
- ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+ ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
+ ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
- %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
+ %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
- G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr)
+ G_STORE %4:gpr(s64), %0 :: (store (s64))
...
---
name: contract_g_unmerge_values_second
@@ -118,12 +110,31 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_second
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
- ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1
- ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+ ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
+ ; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
- %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
+ %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
- G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr)
+ G_STORE %5:gpr(s64), %0 :: (store (s64))
+...
+---
+name: contract_s16_truncstore
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $x0, $s1
+ ; CHECK-LABEL: name: contract_s16_truncstore
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16))
+ %0:gpr(p0) = COPY $x0
+ %1:fpr(s32) = COPY $s1
+ %2:gpr(s32) = COPY %1
+ G_STORE %2:gpr(s32), %0 :: (store (s16))
+...
More information about the llvm-commits
mailing list