[llvm] adf7973 - [ARM] Describe defs/uses of VLLDM and VLSTM

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 23 08:29:27 PDT 2020


Author: Momchil Velikov
Date: 2020-06-23T16:04:23+01:00
New Revision: adf7973fd35d899a1500d2f379b4e6874b4de20f

URL: https://github.com/llvm/llvm-project/commit/adf7973fd35d899a1500d2f379b4e6874b4de20f
DIFF: https://github.com/llvm/llvm-project/commit/adf7973fd35d899a1500d2f379b4e6874b4de20f.diff

LOG: [ARM] Describe defs/uses of VLLDM and VLSTM

The VLLDM and VLSTM instructions are incompletely specified.  They
(potentially) write (or read, respectively) registers Q0-Q7, VPR, and
FPSCR, but the compiler is unaware of it.

In the new test case `cmse-vlldm-no-reorder.ll` case the compiler
missed an anti-dependency and reordered a `VLLDM` ahead of the
instruction, which stashed the return value from the non-secure call,
effectively clobbering said value.

This test case does not fail with upstream LLVM, because of scheduling
differences and I couldn't find a test case for the VLSTM either.

Differential Revision: https://reviews.llvm.org/D81586

Added: 
    llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
    llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir

Modified: 
    llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
    llvm/lib/Target/ARM/ARMInstrVFP.td
    llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 49056d783028..48622aae3cb4 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -85,12 +85,15 @@ namespace {
                                           const BitVector &ClearRegs);
     void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+                             const LivePhysRegs &LiveRegs,
                              SmallVectorImpl<unsigned> &AvailableRegs);
     void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+                               const LivePhysRegs &LiveRegs,
                                SmallVectorImpl<unsigned> &ScratchRegs);
     void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MBBI, DebugLoc &DL);
+                                MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+                                const LivePhysRegs &LiveRegs);
     void CMSERestoreFPRegs(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, DebugLoc &DL,
                            SmallVectorImpl<unsigned> &AvailableRegs);
@@ -1202,17 +1205,17 @@ ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
 
 void ARMExpandPseudo::CMSESaveClearFPRegs(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
-    SmallVectorImpl<unsigned> &ScratchRegs) {
+    const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
   if (STI->hasV8_1MMainlineOps())
-    CMSESaveClearFPRegsV81(MBB, MBBI, DL);
+    CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
   else
-    CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs);
+    CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
 }
 
 // Save and clear FP registers if present
 void ARMExpandPseudo::CMSESaveClearFPRegsV8(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
-    SmallVectorImpl<unsigned> &ScratchRegs) {
+    const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
   if (!STI->hasFPRegs())
     return;
 
@@ -1271,7 +1274,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
   // Lazy store all fp registers to the stack
   MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
                                   .addReg(ARM::SP)
-          .add(predOps(ARMCC::AL));
+                                  .add(predOps(ARMCC::AL));
+  for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+                 ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+    VLSTM.addReg(R, RegState::Implicit |
+                        (LiveRegs.contains(R) ? 0 : RegState::Undef));
 
   // Restore all arguments
   for (const auto &Regs : ClearedFPRegs) {
@@ -1343,7 +1350,8 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
 
 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MBBI,
-                                             DebugLoc &DL) {
+                                             DebugLoc &DL,
+                                             const LivePhysRegs &LiveRegs) {
   BitVector ClearRegs(32, true);
   bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
 
@@ -1358,9 +1366,13 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
         .add(predOps(ARMCC::AL));
 
     // Lazy store all FP registers to the stack
-    BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
-        .addReg(ARM::SP)
-        .add(predOps(ARMCC::AL));
+    MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+                                    .addReg(ARM::SP)
+                                    .add(predOps(ARMCC::AL));
+    for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+                   ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+      VLSTM.addReg(R, RegState::Implicit |
+                          (LiveRegs.contains(R) ? 0 : RegState::Undef));
   } else {
     // Push all the callee-saved registers (s16-s31).
     MachineInstrBuilder VPUSH =
@@ -1754,13 +1766,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
                                 MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MBBI, int JumpReg,
-                                bool Thumb1Only) {
+                                const LivePhysRegs &LiveRegs, bool Thumb1Only) {
   const DebugLoc &DL = MBBI->getDebugLoc();
   if (Thumb1Only) { // push Lo and Hi regs separately
     MachineInstrBuilder PushMIB =
         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
-    for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg)
-      PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+    for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+      PushMIB.addReg(
+          Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+    }
 
     // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
     // regs that we just saved and push the low regs again, taking care to
@@ -1773,7 +1787,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
       if (JumpReg == LoReg)
         continue;
       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
-          .addReg(HiReg, RegState::Undef)
+          .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
           .add(predOps(ARMCC::AL));
       --HiReg;
     }
@@ -1791,19 +1805,21 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
     if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
       int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
-          .addReg(ARM::R8)
+          .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
           .add(predOps(ARMCC::AL));
       BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
           .add(predOps(ARMCC::AL))
-          .addReg(LoReg);
+          .addReg(LoReg, RegState::Kill);
     }
   } else { // push Lo and Hi registers with a single instruction
     MachineInstrBuilder PushMIB =
         BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
             .addReg(ARM::SP)
             .add(predOps(ARMCC::AL));
-    for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
-      PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+    for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
+      PushMIB.addReg(
+          Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+    }
   }
 }
 
@@ -1934,7 +1950,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::tBLXNS_CALL: {
       DebugLoc DL = MBBI->getDebugLoc();
       unsigned JumpReg = MBBI->getOperand(0).getReg();
-      CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg,
+
+      // Figure out which registers are live at the point immediately before the
+      // call. When we indiscriminately push a set of registers, the live
+      // registers are added as ordinary use operands, whereas dead registers
+      // are "undef".
+      LivePhysRegs LiveRegs(*TRI);
+      LiveRegs.addLiveOuts(MBB);
+      for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
+        LiveRegs.stepBackward(MI);
+      LiveRegs.stepBackward(*MBBI);
+
+      CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
                           AFI->isThumb1OnlyFunction());
 
       SmallVector<unsigned, 16> ClearRegs;
@@ -1971,7 +1998,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
             .add(predOps(ARMCC::AL));
       }
 
-      CMSESaveClearFPRegs(MBB, MBBI, DL,
+      CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
                           ClearRegs); // save+clear FP regs with ClearRegs
       CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
 

diff  --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 5611ddb57541..c7d12b3164c2 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -287,7 +287,6 @@ def : MnemonicAlias<"vstm", "vstmia">;
 //===----------------------------------------------------------------------===//
 //  Lazy load / store multiple Instructions
 //
-let mayLoad = 1 in
 def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
                   NoItinerary, "vlldm${p}\t$Rn", "", []>,
             Requires<[HasV8MMainline, Has8MSecExt]> {
@@ -298,9 +297,9 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
     let Inst{15-12} = 0;
     let Inst{7-0}   = 0;
     let mayLoad     = 1;
+    let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
 }
 
-let mayStore = 1 in
 def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
                   NoItinerary, "vlstm${p}\t$Rn", "", []>,
             Requires<[HasV8MMainline, Has8MSecExt]> {

diff  --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
new file mode 100644
index 000000000000..dc986c4b30a0
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \
+; RUN:   FileCheck %s
+
+ at g = hidden local_unnamed_addr global float (...)* null, align 4
+ at a = hidden local_unnamed_addr global float 0.000000e+00, align 4
+
+define hidden void @f() local_unnamed_addr #0 {
+entry:
+  %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
+  %call = tail call nnan ninf nsz float %0() #1
+  store float %call, float* @a, align 4
+  ret void
+}
+
+; CHECK: blxns r{{[0-9]+}}
+; CHECK: vmov  r[[T:[0-9]+]], s0
+; CHECK: vlldm sp
+; CHECK: vmov  s0, r[[T]]
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "cmse_nonsecure_call" }

diff  --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir
new file mode 100644
index 000000000000..571ec0cec86c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir
@@ -0,0 +1,112 @@
+# RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard --run-pass=arm-pseudo %s -o - | \
+# RUN: FileCheck %s
+--- |
+  ; ModuleID = 'cmse-vlldm-no-reorder.ll'
+  source_filename = "cmse-vlldm-no-reorder.ll"
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8m.main"
+  
+  @g = hidden local_unnamed_addr global float (...)* null, align 4
+  @a = hidden local_unnamed_addr global float 0.000000e+00, align 4
+  
+  ; Function Attrs: nounwind
+  define hidden void @f() local_unnamed_addr #0 {
+  entry:
+    %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
+    %call = tail call nnan ninf nsz float %0() #2
+    store float %call, float* @a, align 4
+    ret void
+  }
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  attributes #0 = { nounwind "target-cpu"="cortex-m33" }
+  attributes #1 = { nounwind }
+  attributes #2 = { nounwind "cmse_nonsecure_call" }
+
+...
+---
+name:            f
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $r7, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    renamable $r0 = t2MOVi32imm @g
+    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`)
+    tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
+    renamable $r0 = t2MOVi32imm @a
+    VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a)
+    $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc
+
+...
+
+# CHECK-LABEL: bb.0.entry:
+# CHECK: $sp = t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5, $r6, undef $r7, $r8, $r9, $r10, $r11
+# CHECK-NEXT:  $r0 = t2BICri $r0, 1, 14 /* CC::al */, $noreg, $noreg
+# CHECK-NEXT:  $sp = tSUBspi $sp, 34, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  VLSTM $sp, 14 /* CC::al */, $noreg, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $q0, implicit undef $q1, implicit undef $q2, implicit undef $q3, implicit undef $q4, implicit undef $q5, implicit undef $q6, implicit undef $q7
+# CHECK-NEXT:  $r1 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r2 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r5 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r6 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r7 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r8 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r9 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r10 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r11 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  t2MSR_M 3072, $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+# CHECK-NEXT:  tBLXNSr 14 /* CC::al */, $noreg, killed $r0, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
+# CHECK-NEXT:  $r12 = VMOVRS $s0, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  VLLDM $sp, 14 /* CC::al */, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
+# CHECK-NEXT:  $s0 = VMOVSR $r12, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $sp = tADDspi $sp, 34, 14 /* CC::al */, $noreg
+# CHECK-NEXT:  $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
+ 
\ No newline at end of file

diff  --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index f3cab91301da..63cd2a6c0030 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -62,7 +62,7 @@ body:             |
     $sp = tSUBspi $sp, 34, 14, $noreg
     VLSTM $sp, 14, $noreg
     tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp
-    VLLDM $sp, 14, $noreg
+    VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
     $sp = tADDspi $sp, 34, 14, $noreg
     $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
     $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc


        


More information about the llvm-commits mailing list