[llvm] adf7973 - [ARM] Describe defs/uses of VLLDM and VLSTM
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 23 08:29:27 PDT 2020
Author: Momchil Velikov
Date: 2020-06-23T16:04:23+01:00
New Revision: adf7973fd35d899a1500d2f379b4e6874b4de20f
URL: https://github.com/llvm/llvm-project/commit/adf7973fd35d899a1500d2f379b4e6874b4de20f
DIFF: https://github.com/llvm/llvm-project/commit/adf7973fd35d899a1500d2f379b4e6874b4de20f.diff
LOG: [ARM] Describe defs/uses of VLLDM and VLSTM
The VLLDM and VLSTM instructions are incompletely specified. They
(potentially) write (or read, respectively) registers Q0-Q7, VPR, and
FPSCR, but the compiler is unaware of it.
In the new test case `cmse-vlldm-no-reorder.ll` case the compiler
missed an anti-dependency and reordered a `VLLDM` ahead of the
instruction, which stashed the return value from the non-secure call,
effectively clobbering said value.
This test case does not fail with upstream LLVM, because of scheduling
differences and I couldn't find a test case for the VLSTM either.
Differential Revision: https://reviews.llvm.org/D81586
Added:
llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir
Modified:
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMInstrVFP.td
llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 49056d783028..48622aae3cb4 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -85,12 +85,15 @@ namespace {
const BitVector &ClearRegs);
void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
SmallVectorImpl<unsigned> &AvailableRegs);
void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
SmallVectorImpl<unsigned> &ScratchRegs);
void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL);
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs);
void CMSERestoreFPRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs);
@@ -1202,17 +1205,17 @@ ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
void ARMExpandPseudo::CMSESaveClearFPRegs(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- SmallVectorImpl<unsigned> &ScratchRegs) {
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (STI->hasV8_1MMainlineOps())
- CMSESaveClearFPRegsV81(MBB, MBBI, DL);
+ CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
else
- CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs);
+ CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
}
// Save and clear FP registers if present
void ARMExpandPseudo::CMSESaveClearFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- SmallVectorImpl<unsigned> &ScratchRegs) {
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (!STI->hasFPRegs())
return;
@@ -1271,7 +1274,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
// Lazy store all fp registers to the stack
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
// Restore all arguments
for (const auto &Regs : ClearedFPRegs) {
@@ -1343,7 +1350,8 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc &DL) {
+ DebugLoc &DL,
+ const LivePhysRegs &LiveRegs) {
BitVector ClearRegs(32, true);
bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
@@ -1358,9 +1366,13 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL));
// Lazy store all FP registers to the stack
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
- .addReg(ARM::SP)
- .add(predOps(ARMCC::AL));
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
} else {
// Push all the callee-saved registers (s16-s31).
MachineInstrBuilder VPUSH =
@@ -1754,13 +1766,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int JumpReg,
- bool Thumb1Only) {
+ const LivePhysRegs &LiveRegs, bool Thumb1Only) {
const DebugLoc &DL = MBBI->getDebugLoc();
if (Thumb1Only) { // push Lo and Hi regs separately
MachineInstrBuilder PushMIB =
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg)
- PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
// Thumb1 can only tPUSH low regs, so we copy the high regs to the low
// regs that we just saved and push the low regs again, taking care to
@@ -1773,7 +1787,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
if (JumpReg == LoReg)
continue;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
- .addReg(HiReg, RegState::Undef)
+ .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
.add(predOps(ARMCC::AL));
--HiReg;
}
@@ -1791,19 +1805,21 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
- .addReg(ARM::R8)
+ .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
.add(predOps(ARMCC::AL));
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
.add(predOps(ARMCC::AL))
- .addReg(LoReg);
+ .addReg(LoReg, RegState::Kill);
}
} else { // push Lo and Hi registers with a single instruction
MachineInstrBuilder PushMIB =
BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
- PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
}
}
@@ -1934,7 +1950,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::tBLXNS_CALL: {
DebugLoc DL = MBBI->getDebugLoc();
unsigned JumpReg = MBBI->getOperand(0).getReg();
- CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg,
+
+ // Figure out which registers are live at the point immediately before the
+ // call. When we indiscriminately push a set of registers, the live
+ // registers are added as ordinary use operands, whereas dead registers
+ // are "undef".
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
+ LiveRegs.stepBackward(MI);
+ LiveRegs.stepBackward(*MBBI);
+
+ CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
AFI->isThumb1OnlyFunction());
SmallVector<unsigned, 16> ClearRegs;
@@ -1971,7 +1998,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL));
}
- CMSESaveClearFPRegs(MBB, MBBI, DL,
+ CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
ClearRegs); // save+clear FP regs with ClearRegs
CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 5611ddb57541..c7d12b3164c2 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -287,7 +287,6 @@ def : MnemonicAlias<"vstm", "vstmia">;
//===----------------------------------------------------------------------===//
// Lazy load / store multiple Instructions
//
-let mayLoad = 1 in
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlldm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
@@ -298,9 +297,9 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayLoad = 1;
+ let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
}
-let mayStore = 1 in
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlstm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
diff --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
new file mode 100644
index 000000000000..dc986c4b30a0
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \
+; RUN: FileCheck %s
+
+ at g = hidden local_unnamed_addr global float (...)* null, align 4
+ at a = hidden local_unnamed_addr global float 0.000000e+00, align 4
+
+define hidden void @f() local_unnamed_addr #0 {
+entry:
+ %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
+ %call = tail call nnan ninf nsz float %0() #1
+ store float %call, float* @a, align 4
+ ret void
+}
+
+; CHECK: blxns r{{[0-9]+}}
+; CHECK: vmov r[[T:[0-9]+]], s0
+; CHECK: vlldm sp
+; CHECK: vmov s0, r[[T]]
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "cmse_nonsecure_call" }
diff --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir
new file mode 100644
index 000000000000..571ec0cec86c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir
@@ -0,0 +1,112 @@
+# RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard --run-pass=arm-pseudo %s -o - | \
+# RUN: FileCheck %s
+--- |
+ ; ModuleID = 'cmse-vlldm-no-reorder.ll'
+ source_filename = "cmse-vlldm-no-reorder.ll"
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8m.main"
+
+ @g = hidden local_unnamed_addr global float (...)* null, align 4
+ @a = hidden local_unnamed_addr global float 0.000000e+00, align 4
+
+ ; Function Attrs: nounwind
+ define hidden void @f() local_unnamed_addr #0 {
+ entry:
+ %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
+ %call = tail call nnan ninf nsz float %0() #2
+ store float %call, float* @a, align 4
+ ret void
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { nounwind "target-cpu"="cortex-m33" }
+ attributes #1 = { nounwind }
+ attributes #2 = { nounwind "cmse_nonsecure_call" }
+
+...
+---
+name: f
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $r7, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ renamable $r0 = t2MOVi32imm @g
+ renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`)
+ tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
+ renamable $r0 = t2MOVi32imm @a
+ VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a)
+ $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc
+
+...
+
+# CHECK-LABEL: bb.0.entry:
+# CHECK: $sp = t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5, $r6, undef $r7, $r8, $r9, $r10, $r11
+# CHECK-NEXT: $r0 = t2BICri $r0, 1, 14 /* CC::al */, $noreg, $noreg
+# CHECK-NEXT: $sp = tSUBspi $sp, 34, 14 /* CC::al */, $noreg
+# CHECK-NEXT: VLSTM $sp, 14 /* CC::al */, $noreg, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $q0, implicit undef $q1, implicit undef $q2, implicit undef $q3, implicit undef $q4, implicit undef $q5, implicit undef $q6, implicit undef $q7
+# CHECK-NEXT: $r1 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r2 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r5 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r6 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r7 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r8 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r9 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r10 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r11 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: t2MSR_M 3072, $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+# CHECK-NEXT: tBLXNSr 14 /* CC::al */, $noreg, killed $r0, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
+# CHECK-NEXT: $r12 = VMOVRS $s0, 14 /* CC::al */, $noreg
+# CHECK-NEXT: VLLDM $sp, 14 /* CC::al */, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
+# CHECK-NEXT: $s0 = VMOVSR $r12, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $sp = tADDspi $sp, 34, 14 /* CC::al */, $noreg
+# CHECK-NEXT: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
+
\ No newline at end of file
diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index f3cab91301da..63cd2a6c0030 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -62,7 +62,7 @@ body: |
$sp = tSUBspi $sp, 34, 14, $noreg
VLSTM $sp, 14, $noreg
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp
- VLLDM $sp, 14, $noreg
+ VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
$sp = tADDspi $sp, 34, 14, $noreg
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc
More information about the llvm-commits
mailing list