[llvm] r269736 - [WebAssembly] Improve the precision of memory and side effect dependence tracking.

Dan Gohman via llvm-commits llvm-commits at lists.llvm.org
Mon May 16 21:05:34 PDT 2016


Author: djg
Date: Mon May 16 23:05:31 2016
New Revision: 269736

URL: http://llvm.org/viewvc/llvm-project?rev=269736&view=rev
Log:
[WebAssembly] Improve the precision of memory and side effect dependence tracking.

MachineInstr::isSafeToMove is more conservative than is needed here;
use a more explicit check, and incorporate knowledge of some
WebAssembly-specific opcodes.

Modified:
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
    llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp?rev=269736&r1=269735&r2=269736&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp Mon May 16 23:05:31 2016
@@ -86,6 +86,148 @@ static void ImposeStackOrdering(MachineI
                                              /*isImp=*/true));
 }
 
+// Determine whether a call to the callee referenced by
+// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
+// effects.
+static void QueryCallee(const MachineInstr *MI, unsigned CalleeOpNo,
+                        bool &Read, bool &Write, bool &Effects) {
+  const MachineOperand &MO = MI->getOperand(CalleeOpNo);
+  if (MO.isGlobal()) {
+    const Constant *GV = MO.getGlobal();
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      if (!GA->isInterposable())
+        GV = GA->getAliasee();
+
+    if (const Function *F = dyn_cast<Function>(GV)) {
+      if (!F->doesNotThrow())
+        Effects = true;
+      if (F->doesNotAccessMemory())
+        return;
+      if (F->onlyReadsMemory()) {
+        Read = true;
+        return;
+      }
+    }
+  }
+
+  // Assume the worst.
+  Write = true;
+  Read = true;
+  Effects = true;
+}
+
+// Determine whether MI reads memory, writes memory, and/or has side
+// effects.
+static void Query(const MachineInstr *MI, AliasAnalysis &AA,
+                  bool &Read, bool &Write, bool &Effects) {
+  assert(!MI->isPosition());
+  assert(!MI->isTerminator());
+  assert(!MI->isDebugValue());
+
+  // Check for loads.
+  if (MI->mayLoad() && !MI->isInvariantLoad(&AA))
+    Read = true;
+
+  // Check for stores.
+  if (MI->mayStore())
+    Write = true;
+  else if (MI->hasOrderedMemoryRef()) {
+    switch (MI->getOpcode()) {
+    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+      // These instruction have hasUnmodeledSideEffects() returning true
+      // because they trap on overflow and invalid so they can't be arbitrarily
+      // moved, however hasOrderedMemoryRef() interprets this plus their lack
+      // of memoperands as having a potential unknown memory reference.
+      break;
+    default:
+      // Record potential stores, unless it's a call, as calls are handled
+      // specially below.
+      if (!MI->isCall())
+        Write = true;
+      break;
+    }
+  }
+
+  // Check for side effects.
+  if (MI->hasUnmodeledSideEffects()) {
+    switch (MI->getOpcode()) {
+    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+      // These instructions have hasUnmodeledSideEffects() returning true
+      // because they trap on overflow and invalid so they can't be arbitrarily
+      // moved, however in the specific case of register stackifying, it is safe
+      // to move them because overflow and invalid are Undefined Behavior.
+      break;
+    default:
+      Effects = true;
+      break;
+    }
+  }
+
+  // Analyze calls.
+  if (MI->isCall()) {
+    switch (MI->getOpcode()) {
+    case WebAssembly::CALL_VOID:
+      QueryCallee(MI, 0, Read, Write, Effects);
+      break;
+    case WebAssembly::CALL_I32:
+    case WebAssembly::CALL_I64:
+    case WebAssembly::CALL_F32:
+    case WebAssembly::CALL_F64:
+      QueryCallee(MI, 1, Read, Write, Effects);
+      break;
+    case WebAssembly::CALL_INDIRECT_VOID:
+    case WebAssembly::CALL_INDIRECT_I32:
+    case WebAssembly::CALL_INDIRECT_I64:
+    case WebAssembly::CALL_INDIRECT_F32:
+    case WebAssembly::CALL_INDIRECT_F64:
+      Read = true;
+      Write = true;
+      Effects = true;
+      break;
+    default:
+      llvm_unreachable("unexpected call opcode");
+    }
+  }
+}
+
+// Test whether Def is safe and profitable to rematerialize.
+static bool ShouldRematerialize(const MachineInstr *Def, AliasAnalysis &AA,
+                                const WebAssemblyInstrInfo *TII) {
+  return Def->isAsCheapAsAMove() &&
+         TII->isTriviallyReMaterializable(Def, &AA);
+}
+
+/// Identify the definition for this register at this point.
+static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
+                                const MachineRegisterInfo &MRI,
+                                const LiveIntervals &LIS)
+{
+  // Most registers are in SSA form here so we try a quick MRI query first.
+  if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
+    return Def;
+
+  // MRI doesn't know what the Def is. Try asking LIS.
+  if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
+          LIS.getInstructionIndex(*Insert)))
+    return LIS.getInstructionFromIndex(ValNo->def);
+
+  return nullptr;
+}
+
 // Test whether it's safe to move Def to just before Insert.
 // TODO: Compute memory dependencies in a way that doesn't require always
 // walking the block.
@@ -95,8 +237,6 @@ static bool IsSafeToMove(const MachineIn
                          AliasAnalysis &AA, const LiveIntervals &LIS,
                          const MachineRegisterInfo &MRI) {
   assert(Def->getParent() == Insert->getParent());
-  bool SawStore = false, SawSideEffects = false;
-  MachineBasicBlock::const_iterator D(Def), I(Insert);
 
   // Check for register dependencies.
   for (const MachineOperand &MO : Def->operands()) {
@@ -133,12 +273,30 @@ static bool IsSafeToMove(const MachineIn
       return false;
   }
 
-  SawStore = Def->isCall() || Def->mayStore();
-  // Check for memory dependencies and side effects.
-  for (--I; I != D; --I)
-    SawSideEffects |= !I->isSafeToMove(&AA, SawStore);
-  return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
-         !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+  bool Read = false, Write = false, Effects = false;
+  Query(Def, AA, Read, Write, Effects);
+
+  // If the instruction does not access memory and has no side effects, it has
+  // no additional dependencies.
+  if (!Read && !Write && !Effects)
+    return true;
+
+  // Scan through the intervening instructions between Def and Insert.
+  MachineBasicBlock::const_iterator D(Def), I(Insert);
+  for (--I; I != D; --I) {
+    bool InterveningRead = false;
+    bool InterveningWrite = false;
+    bool InterveningEffects = false;
+    Query(I, AA, InterveningRead, InterveningWrite, InterveningEffects);
+    if (Effects && InterveningEffects)
+      return false;
+    if (Read && InterveningWrite)
+      return false;
+    if (Write && (InterveningRead || InterveningWrite))
+      return false;
+  }
+
+  return true;
 }
 
 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
@@ -197,6 +355,14 @@ static unsigned GetTeeLocalOpcode(const
   llvm_unreachable("Unexpected register class");
 }
 
+// Shrink LI to its uses, cleaning up LI.
+static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
+  if (LIS.shrinkToUses(&LI)) {
+    SmallVector<LiveInterval*, 4> SplitLIs;
+    LIS.splitSeparateComponents(LI, SplitLIs);
+  }
+}
+
 /// A single-use def in the same block with no intervening memory or register
 /// dependencies; move the def down and nest it with the current instruction.
 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
@@ -205,6 +371,8 @@ static MachineInstr *MoveForSingleUse(un
                                       MachineInstr *Insert, LiveIntervals &LIS,
                                       WebAssemblyFunctionInfo &MFI,
                                       MachineRegisterInfo &MRI) {
+  DEBUG(dbgs() << "Move for single use: "; Def->dump());
+
   MBB.splice(Insert, &MBB, Def);
   LIS.handleMove(*Def);
 
@@ -221,9 +389,11 @@ static MachineInstr *MoveForSingleUse(un
     // Tell LiveIntervals about the changes to the old register.
     LiveInterval &LI = LIS.getInterval(Reg);
     LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
-    LIS.shrinkToUses(&LI);
+    ShrinkToUses(LI, LIS);
 
     MFI.stackifyVReg(NewReg);
+
+    DEBUG(dbgs() << " - Replaced register: "; Def->dump());
   }
 
   ImposeStackOrdering(Def);
@@ -238,6 +408,9 @@ RematerializeCheapDef(unsigned Reg, Mach
                       LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
                       MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII,
                       const WebAssemblyRegisterInfo *TRI) {
+  DEBUG(dbgs() << "Rematerializing cheap def: "; Def->dump());
+  DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
+
   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
   Op.setReg(NewReg);
@@ -247,16 +420,19 @@ RematerializeCheapDef(unsigned Reg, Mach
   MFI.stackifyVReg(NewReg);
   ImposeStackOrdering(Clone);
 
+  DEBUG(dbgs() << " - Cloned to "; Clone->dump());
+
   // Shrink the interval.
   bool IsDead = MRI.use_empty(Reg);
   if (!IsDead) {
     LiveInterval &LI = LIS.getInterval(Reg);
-    LIS.shrinkToUses(&LI);
+    ShrinkToUses(LI, LIS);
     IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
   }
 
   // If that was the last use of the original, delete the original.
   if (IsDead) {
+    DEBUG(dbgs() << " - Deleting original\n");
     SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
     LIS.removeInterval(Reg);
@@ -291,6 +467,8 @@ static MachineInstr *MoveAndTeeForMultiU
     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
+  DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
+
   MBB.splice(Insert, &MBB, Def);
   LIS.handleMove(*Def);
   const auto *RegClass = MRI.getRegClass(Reg);
@@ -491,17 +669,9 @@ bool WebAssemblyRegStackify::runOnMachin
 
         // Identify the definition for this register at this point. Most
         // registers are in SSA form here so we try a quick MRI query first.
-        MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
-        if (!Def) {
-          // MRI doesn't know what the Def is. Try asking LIS.
-          const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
-              LIS.getInstructionIndex(*Insert));
-          if (!ValNo)
-            continue;
-          Def = LIS.getInstructionFromIndex(ValNo->def);
-          if (!Def)
-            continue;
-        }
+        MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
+        if (!Def)
+          continue;
 
         // Don't nest an INLINE_ASM def into anything, because we don't have
         // constraints for $pop outputs.
@@ -531,8 +701,7 @@ bool WebAssemblyRegStackify::runOnMachin
                        !TreeWalker.IsOnStack(Reg);
         if (CanMove && MRI.hasOneUse(Reg)) {
           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
-        } else if (Def->isAsCheapAsAMove() &&
-                   TII->isTriviallyReMaterializable(Def, &AA)) {
+        } else if (ShouldRematerialize(Def, AA, TII)) {
           Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
                                          MRI, TII, TRI);
         } else if (CanMove &&

Modified: llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll?rev=269736&r1=269735&r2=269736&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/reg-stackify.ll Mon May 16 23:05:31 2016
@@ -44,6 +44,38 @@ define i32 @yes1(i32* %q) {
   ret i32 %t
 }
 
+; Yes because undefined behavior can be sunk past a store.
+
+; CHECK-LABEL: sink_trap:
+; CHECK: return $pop0{{$}}
+define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
+  %t = sdiv i32 %x, %y
+  store volatile i32 0, i32* %p
+  ret i32 %t
+}
+
+; Yes because the call is readnone.
+
+; CHECK-LABEL: sink_readnone_call:
+; CHECK: return $pop0{{$}}
+declare i32 @readnone_callee() readnone nounwind
+define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
+  %t = call i32 @readnone_callee()
+  store volatile i32 0, i32* %p
+  ret i32 %t
+}
+
+; No because the call is readonly and there's an intervening store.
+
+; CHECK-LABEL: no_sink_readonly_call:
+; CHECK: return ${{[0-9]+}}{{$}}
+declare i32 @readonly_callee() readonly nounwind
+define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
+  %t = call i32 @readonly_callee()
+  store i32 0, i32* %p
+  ret i32 %t
+}
+
 ; Don't schedule stack uses into the stack. To reduce register pressure, the
 ; scheduler might be tempted to move the definition of $2 down. However, this
 ; would risk getting incorrect liveness if the instructions are later
@@ -69,7 +101,7 @@ define i32 @yes1(i32* %q) {
 ; CHECK-NEXT: br_if       0, $pop8{{$}}
 ; CHECK-NEXT: i32.const   $push9=, 0{{$}}
 ; CHECK-NEXT: return      $pop9{{$}}
-; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: .LBB7_2:
 ; CHECK-NEXT: end_block{{$}}
 ; CHECK-NEXT: i32.const   $push14=, 1{{$}}
 ; CHECK-NEXT: return      $pop14{{$}}
@@ -103,7 +135,7 @@ false:
 ; CHECK-NEXT: i32.lt_u    $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
 ; CHECK-NEXT: br_if       0, $pop[[NUM3]]{{$}}
 ; CHECK-NEXT: i32.store   $discard=, 0($2), $3{{$}}
-; CHECK-NEXT: .LBB5_3:
+; CHECK-NEXT: .LBB8_3:
 ; CHECK-NEXT: end_block{{$}}
 ; CHECK-NEXT: return{{$}}
 define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {




More information about the llvm-commits mailing list