[llvm] 04fb9b7 - [AArch64][GlobalISel] Fix incorrect handling of fp truncating stores.

Tue Aug 24 16:07:06 PDT 2021

Author: Amara Emerson
Date: 2021-08-24T16:07:00-07:00
New Revision: 04fb9b729a53a5f513328159590d86d28336a6da

URL: https://github.com/llvm/llvm-project/commit/04fb9b729a53a5f513328159590d86d28336a6da
DIFF: https://github.com/llvm/llvm-project/commit/04fb9b729a53a5f513328159590d86d28336a6da.diff

LOG: [AArch64][GlobalISel] Fix incorrect handling of fp truncating stores.

When the tablegen patterns fail to select a truncating scalar FPR store,
our manual selection code also failed to handle it silently, trying to
generate an invalid copy. Fix this by adding support in the manual code
to generate a proper subreg copy before selecting a non-truncating store.

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 222cd216b911..6b22c3329c7e 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2723,8 +2723,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   case TargetOpcode::G_ZEXTLOAD:
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE: {
+    GLoadStore &LdSt = cast<GLoadStore>(I);
     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
-    LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+    LLT PtrTy = MRI.getType(LdSt.getPointerReg());
 
     if (PtrTy != LLT::pointer(0, 64)) {
       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
@@ -2732,20 +2733,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       return false;
     }
 
-    auto &MemOp = **I.memoperands_begin();
-    uint64_t MemSizeInBytes = MemOp.getSize();
-    unsigned MemSizeInBits = MemSizeInBytes * 8;
-    AtomicOrdering Order = MemOp.getSuccessOrdering();
+    uint64_t MemSizeInBytes = LdSt.getMemSize();
+    unsigned MemSizeInBits = LdSt.getMemSizeInBits();
+    AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
 
     // Need special instructions for atomics that affect ordering.
     if (Order != AtomicOrdering::NotAtomic &&
         Order != AtomicOrdering::Unordered &&
         Order != AtomicOrdering::Monotonic) {
-      assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
+      assert(!isa<GZExtLoad>(LdSt));
       if (MemSizeInBytes > 64)
         return false;
 
-      if (I.getOpcode() == TargetOpcode::G_LOAD) {
+      if (isa<GLoad>(LdSt)) {
         static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
                                      AArch64::LDARW, AArch64::LDARX};
         I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
@@ -2759,7 +2759,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
 #ifndef NDEBUG
-    const Register PtrReg = I.getOperand(1).getReg();
+    const Register PtrReg = LdSt.getPointerReg();
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
     // Sanity-check the pointer register.
     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
@@ -2768,13 +2768,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
            "Load/Store pointer operand isn't a pointer");
 #endif
 
-    const Register ValReg = I.getOperand(0).getReg();
+    const Register ValReg = LdSt.getReg(0);
+    const LLT ValTy = MRI.getType(ValReg);
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
+    // The code below doesn't support truncating stores, so we need to split it
+    // again.
+    if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+      unsigned SubReg;
+      LLT MemTy = LdSt.getMMO().getMemoryType();
+      auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+      if (!getSubRegForClass(RC, TRI, SubReg))
+        return false;
+
+      // Generate a subreg copy.
+      auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
+                      .addReg(ValReg, 0, SubReg)
+                      .getReg(0);
+      RBI.constrainGenericRegister(Copy, *RC, MRI);
+      LdSt.getOperand(0).setReg(Copy);
+    }
+
     // Helper lambda for partially selecting I. Either returns the original
     // instruction with an updated opcode, or a new instruction.
     auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
-      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+      bool IsStore = isa<GStore>(I);
       const unsigned NewOpc =
           selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
       if (NewOpc == I.getOpcode())
@@ -2791,7 +2809,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
       // Folded something. Create a new instruction and return it.
       auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
-      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+      Register CurValReg = I.getOperand(0).getReg();
+      IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
       NewInst.cloneMemRefs(I);
       for (auto &Fn : *AddrModeFns)
         Fn(NewInst);

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir
new file mode 100644
index 000000000000..29fc8b3f94fa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir
@@ -0,0 +1,115 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+  define void @truncating_f32(double %x) {
+    %alloca = alloca i32, align 4
+    %bitcast = bitcast double %x to i64
+    %trunc = trunc i64 %bitcast to i32
+    store i32 %trunc, i32* %alloca, align 4
+    ret void
+  }
+
+  define void @truncating_f16(double %x) {
+    %alloca = alloca i16, align 2
+    %bitcast = bitcast double %x to i64
+    %trunc = trunc i64 %bitcast to i16
+    store i16 %trunc, i16* %alloca, align 2
+    ret void
+  }
+
+  define void @truncating_f8(double %x) {
+    %alloca = alloca i8, align 1
+    %bitcast = bitcast double %x to i64
+    %trunc = trunc i64 %bitcast to i8
+    store i8 %trunc, i8* %alloca, align 1
+    ret void
+  }
+
+...
+---
+name:            truncating_f32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$d0' }
+frameInfo:
+  maxAlignment:    4
+stack:
+  - { id: 0, name: alloca, size: 4, alignment: 4 }
+machineFunctionInfo: {}
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: $d0
+
+    ; CHECK-LABEL: name: truncating_f32
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
+    ; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca)
+    ; CHECK: RET_ReallyLR
+    %0:fpr(s64) = COPY $d0
+    %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
+    G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca)
+    RET_ReallyLR
+
+...
+---
+name:            truncating_f16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$d0' }
+frameInfo:
+  maxAlignment:    2
+stack:
+  - { id: 0, name: alloca, size: 2, alignment: 2 }
+machineFunctionInfo: {}
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: $d0
+
+    ; CHECK-LABEL: name: truncating_f16
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
+    ; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca)
+    ; CHECK: RET_ReallyLR
+    %0:fpr(s64) = COPY $d0
+    %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
+    G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca)
+    RET_ReallyLR
+
+...
+---
+name:            truncating_f8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$d0' }
+frameInfo:
+  maxAlignment:    1
+stack:
+  - { id: 0, name: alloca, size: 1, alignment: 1 }
+machineFunctionInfo: {}
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: $d0
+
+    ; CHECK-LABEL: name: truncating_f8
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr8 = COPY [[COPY]].bsub
+    ; CHECK: STRBui [[COPY1]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca)
+    ; CHECK: RET_ReallyLR
+    %0:fpr(s64) = COPY $d0
+    %1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
+    G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca)
+    RET_ReallyLR
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir
index e0983dbfdd14..9d044d18b8c2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir
@@ -278,13 +278,13 @@ body:             |
     ; CHECK-LABEL: name: test_rule96_id2146_at_idx8070
     ; CHECK: liveins: $x0
     ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
-    ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1))
+    ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
     ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
-    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
+    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7
     ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
     %2:gpr(p0) = COPY $x0
-    %0:fpr(s1) = G_LOAD %2(p0) :: (load (s1))
-    %1:gpr(s32) = G_ZEXT %0(s1)
+    %0:fpr(s8) = G_LOAD %2(p0) :: (load (s8))
+    %1:gpr(s32) = G_ZEXT %0(s8)
     $noreg = PATCHABLE_RET %1(s32)
 
 ...