[llvm] r269736 - [WebAssembly] Improve the precision of memory and side effect dependence tracking.
Dan Gohman via llvm-commits
llvm-commits at lists.llvm.org
Mon May 16 21:05:34 PDT 2016
Author: djg
Date: Mon May 16 23:05:31 2016
New Revision: 269736
URL: http://llvm.org/viewvc/llvm-project?rev=269736&view=rev
Log:
[WebAssembly] Improve the precision of memory and side effect dependence tracking.
MachineInstr::isSafeToMove is more conservative than is needed here;
use a more explicit check, and incorporate knowledge of some
WebAssembly-specific opcodes.
Modified:
llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll
Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp?rev=269736&r1=269735&r2=269736&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp Mon May 16 23:05:31 2016
@@ -86,6 +86,148 @@ static void ImposeStackOrdering(MachineI
/*isImp=*/true));
}
+// Determine whether a call to the callee referenced by
+// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
+// effects.
+static void QueryCallee(const MachineInstr *MI, unsigned CalleeOpNo,
+ bool &Read, bool &Write, bool &Effects) {
+ const MachineOperand &MO = MI->getOperand(CalleeOpNo);
+ if (MO.isGlobal()) {
+ const Constant *GV = MO.getGlobal();
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (!GA->isInterposable())
+ GV = GA->getAliasee();
+
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ if (!F->doesNotThrow())
+ Effects = true;
+ if (F->doesNotAccessMemory())
+ return;
+ if (F->onlyReadsMemory()) {
+ Read = true;
+ return;
+ }
+ }
+ }
+
+ // Assume the worst.
+ Write = true;
+ Read = true;
+ Effects = true;
+}
+
+// Determine whether MI reads memory, writes memory, and/or has side
+// effects.
+static void Query(const MachineInstr *MI, AliasAnalysis &AA,
+ bool &Read, bool &Write, bool &Effects) {
+ assert(!MI->isPosition());
+ assert(!MI->isTerminator());
+ assert(!MI->isDebugValue());
+
+ // Check for loads.
+ if (MI->mayLoad() && !MI->isInvariantLoad(&AA))
+ Read = true;
+
+ // Check for stores.
+ if (MI->mayStore())
+ Write = true;
+ else if (MI->hasOrderedMemoryRef()) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ // These instruction have hasUnmodeledSideEffects() returning true
+ // because they trap on overflow and invalid so they can't be arbitrarily
+ // moved, however hasOrderedMemoryRef() interprets this plus their lack
+ // of memoperands as having a potential unknown memory reference.
+ break;
+ default:
+ // Record potential stores, unless it's a call, as calls are handled
+ // specially below.
+ if (!MI->isCall())
+ Write = true;
+ break;
+ }
+ }
+
+ // Check for side effects.
+ if (MI->hasUnmodeledSideEffects()) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ // These instructions have hasUnmodeledSideEffects() returning true
+ // because they trap on overflow and invalid so they can't be arbitrarily
+ // moved, however in the specific case of register stackifying, it is safe
+ // to move them because overflow and invalid are Undefined Behavior.
+ break;
+ default:
+ Effects = true;
+ break;
+ }
+ }
+
+ // Analyze calls.
+ if (MI->isCall()) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::CALL_VOID:
+ QueryCallee(MI, 0, Read, Write, Effects);
+ break;
+ case WebAssembly::CALL_I32:
+ case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_F32:
+ case WebAssembly::CALL_F64:
+ QueryCallee(MI, 1, Read, Write, Effects);
+ break;
+ case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F64:
+ Read = true;
+ Write = true;
+ Effects = true;
+ break;
+ default:
+ llvm_unreachable("unexpected call opcode");
+ }
+ }
+}
+
+// Test whether Def is safe and profitable to rematerialize.
+static bool ShouldRematerialize(const MachineInstr *Def, AliasAnalysis &AA,
+ const WebAssemblyInstrInfo *TII) {
+ return Def->isAsCheapAsAMove() &&
+ TII->isTriviallyReMaterializable(Def, &AA);
+}
+
+/// Identify the definition for this register at this point.
+static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals &LIS)
+{
+ // Most registers are in SSA form here so we try a quick MRI query first.
+ if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
+ return Def;
+
+ // MRI doesn't know what the Def is. Try asking LIS.
+ if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
+ LIS.getInstructionIndex(*Insert)))
+ return LIS.getInstructionFromIndex(ValNo->def);
+
+ return nullptr;
+}
+
// Test whether it's safe to move Def to just before Insert.
// TODO: Compute memory dependencies in a way that doesn't require always
// walking the block.
@@ -95,8 +237,6 @@ static bool IsSafeToMove(const MachineIn
AliasAnalysis &AA, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
assert(Def->getParent() == Insert->getParent());
- bool SawStore = false, SawSideEffects = false;
- MachineBasicBlock::const_iterator D(Def), I(Insert);
// Check for register dependencies.
for (const MachineOperand &MO : Def->operands()) {
@@ -133,12 +273,30 @@ static bool IsSafeToMove(const MachineIn
return false;
}
- SawStore = Def->isCall() || Def->mayStore();
- // Check for memory dependencies and side effects.
- for (--I; I != D; --I)
- SawSideEffects |= !I->isSafeToMove(&AA, SawStore);
- return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
- !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+ bool Read = false, Write = false, Effects = false;
+ Query(Def, AA, Read, Write, Effects);
+
+ // If the instruction does not access memory and has no side effects, it has
+ // no additional dependencies.
+ if (!Read && !Write && !Effects)
+ return true;
+
+ // Scan through the intervening instructions between Def and Insert.
+ MachineBasicBlock::const_iterator D(Def), I(Insert);
+ for (--I; I != D; --I) {
+ bool InterveningRead = false;
+ bool InterveningWrite = false;
+ bool InterveningEffects = false;
+ Query(I, AA, InterveningRead, InterveningWrite, InterveningEffects);
+ if (Effects && InterveningEffects)
+ return false;
+ if (Read && InterveningWrite)
+ return false;
+ if (Write && (InterveningRead || InterveningWrite))
+ return false;
+ }
+
+ return true;
}
/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
@@ -197,6 +355,14 @@ static unsigned GetTeeLocalOpcode(const
llvm_unreachable("Unexpected register class");
}
+// Shrink LI to its uses, cleaning up LI.
+static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
+ if (LIS.shrinkToUses(&LI)) {
+ SmallVector<LiveInterval*, 4> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ }
+}
+
/// A single-use def in the same block with no intervening memory or register
/// dependencies; move the def down and nest it with the current instruction.
static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
@@ -205,6 +371,8 @@ static MachineInstr *MoveForSingleUse(un
MachineInstr *Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
+ DEBUG(dbgs() << "Move for single use: "; Def->dump());
+
MBB.splice(Insert, &MBB, Def);
LIS.handleMove(*Def);
@@ -221,9 +389,11 @@ static MachineInstr *MoveForSingleUse(un
// Tell LiveIntervals about the changes to the old register.
LiveInterval &LI = LIS.getInterval(Reg);
LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
- LIS.shrinkToUses(&LI);
+ ShrinkToUses(LI, LIS);
MFI.stackifyVReg(NewReg);
+
+ DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}
ImposeStackOrdering(Def);
@@ -238,6 +408,9 @@ RematerializeCheapDef(unsigned Reg, Mach
LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII,
const WebAssemblyRegisterInfo *TRI) {
+ DEBUG(dbgs() << "Rematerializing cheap def: "; Def->dump());
+ DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
+
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
Op.setReg(NewReg);
@@ -247,16 +420,19 @@ RematerializeCheapDef(unsigned Reg, Mach
MFI.stackifyVReg(NewReg);
ImposeStackOrdering(Clone);
+ DEBUG(dbgs() << " - Cloned to "; Clone->dump());
+
// Shrink the interval.
bool IsDead = MRI.use_empty(Reg);
if (!IsDead) {
LiveInterval &LI = LIS.getInterval(Reg);
- LIS.shrinkToUses(&LI);
+ ShrinkToUses(LI, LIS);
IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
}
// If that was the last use of the original, delete the original.
if (IsDead) {
+ DEBUG(dbgs() << " - Deleting original\n");
SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
LIS.removeInterval(Reg);
@@ -291,6 +467,8 @@ static MachineInstr *MoveAndTeeForMultiU
unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
+ DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
+
MBB.splice(Insert, &MBB, Def);
LIS.handleMove(*Def);
const auto *RegClass = MRI.getRegClass(Reg);
@@ -491,17 +669,9 @@ bool WebAssemblyRegStackify::runOnMachin
// Identify the definition for this register at this point. Most
// registers are in SSA form here so we try a quick MRI query first.
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
- if (!Def) {
- // MRI doesn't know what the Def is. Try asking LIS.
- const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
- LIS.getInstructionIndex(*Insert));
- if (!ValNo)
- continue;
- Def = LIS.getInstructionFromIndex(ValNo->def);
- if (!Def)
- continue;
- }
+ MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
+ if (!Def)
+ continue;
// Don't nest an INLINE_ASM def into anything, because we don't have
// constraints for $pop outputs.
@@ -531,8 +701,7 @@ bool WebAssemblyRegStackify::runOnMachin
!TreeWalker.IsOnStack(Reg);
if (CanMove && MRI.hasOneUse(Reg)) {
Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
- } else if (Def->isAsCheapAsAMove() &&
- TII->isTriviallyReMaterializable(Def, &AA)) {
+ } else if (ShouldRematerialize(Def, AA, TII)) {
Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII, TRI);
} else if (CanMove &&
Modified: llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll?rev=269736&r1=269735&r2=269736&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll Mon May 16 23:05:31 2016
@@ -44,6 +44,38 @@ define i32 @yes1(i32* %q) {
ret i32 %t
}
+; Yes because undefined behavior can be sunk past a store.
+
+; CHECK-LABEL: sink_trap:
+; CHECK: return $pop0{{$}}
+define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
+ %t = sdiv i32 %x, %y
+ store volatile i32 0, i32* %p
+ ret i32 %t
+}
+
+; Yes because the call is readnone.
+
+; CHECK-LABEL: sink_readnone_call:
+; CHECK: return $pop0{{$}}
+declare i32 @readnone_callee() readnone nounwind
+define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
+ %t = call i32 @readnone_callee()
+ store volatile i32 0, i32* %p
+ ret i32 %t
+}
+
+; No because the call is readonly and there's an intervening store.
+
+; CHECK-LABEL: no_sink_readonly_call:
+; CHECK: return ${{[0-9]+}}{{$}}
+declare i32 @readonly_callee() readonly nounwind
+define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
+ %t = call i32 @readonly_callee()
+ store i32 0, i32* %p
+ ret i32 %t
+}
+
; Don't schedule stack uses into the stack. To reduce register pressure, the
; scheduler might be tempted to move the definition of $2 down. However, this
; would risk getting incorrect liveness if the instructions are later
@@ -69,7 +101,7 @@ define i32 @yes1(i32* %q) {
; CHECK-NEXT: br_if 0, $pop8{{$}}
; CHECK-NEXT: i32.const $push9=, 0{{$}}
; CHECK-NEXT: return $pop9{{$}}
-; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: i32.const $push14=, 1{{$}}
; CHECK-NEXT: return $pop14{{$}}
@@ -103,7 +135,7 @@ false:
; CHECK-NEXT: i32.lt_u $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
; CHECK-NEXT: br_if 0, $pop[[NUM3]]{{$}}
; CHECK-NEXT: i32.store $discard=, 0($2), $3{{$}}
-; CHECK-NEXT: .LBB5_3:
+; CHECK-NEXT: .LBB8_3:
; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
More information about the llvm-commits
mailing list