[llvm] r258781 - [WebAssembly] Optimize memcpy/memmove/memcpy calls.

Dan Gohman via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 25 20:01:11 PST 2016


Author: djg
Date: Mon Jan 25 22:01:11 2016
New Revision: 258781

URL: http://llvm.org/viewvc/llvm-project?rev=258781&view=rev
Log:
[WebAssembly] Optimize memcpy/memmove/memcpy calls.

These calls return their first argument, but because LLVM uses an intrinsic
with a void return type, they can't use the returned attribute. Generalize
the store results pass to optimize these calls too.

Added:
    llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll
Modified:
    llvm/trunk/lib/Target/WebAssembly/README.txt
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
    llvm/trunk/test/CodeGen/WebAssembly/global.ll

Modified: llvm/trunk/lib/Target/WebAssembly/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/README.txt?rev=258781&r1=258780&r2=258781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/README.txt (original)
+++ llvm/trunk/lib/Target/WebAssembly/README.txt Mon Jan 25 22:01:11 2016
@@ -78,11 +78,6 @@ stores.
 
 //===---------------------------------------------------------------------===//
 
-Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
-even when they are translated through intrinsics.
-
-//===---------------------------------------------------------------------===//
-
 Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch,
 optimizeLoadInstr, and/or getMachineCombinerPatterns.
 

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp?rev=258781&r1=258780&r2=258781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp Mon Jan 25 22:01:11 2016
@@ -15,7 +15,10 @@
 #include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-peephole"
@@ -28,6 +31,7 @@ class WebAssemblyPeephole final : public
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -44,11 +48,36 @@ FunctionPass *llvm::createWebAssemblyPee
   return new WebAssemblyPeephole();
 }
 
-bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+/// If desirable, rewrite NewReg to a discard register.
+static bool MaybeRewriteToDiscard(unsigned OldReg, unsigned NewReg,
+                                  MachineOperand &MO,
+                                  WebAssemblyFunctionInfo &MFI,
+                                  MachineRegisterInfo &MRI) {
   bool Changed = false;
+  // TODO: Handle SP/physregs
+  if (OldReg == NewReg && TargetRegisterInfo::isVirtualRegister(NewReg)) {
+    Changed = true;
+    unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+    MO.setReg(NewReg);
+    MO.setIsDead();
+    MFI.stackifyVReg(NewReg);
+    MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
+  }
+  return Changed;
+}
+
+bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Store Results **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  const WebAssemblyTargetLowering &TLI =
+      *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
+  auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  bool Changed = false;
 
   for (auto &MBB : MF)
     for (auto &MI : MBB)
@@ -69,17 +98,33 @@ bool WebAssemblyPeephole::runOnMachineFu
         // can use $discard instead.
         MachineOperand &MO = MI.getOperand(0);
         unsigned OldReg = MO.getReg();
-        // TODO: Handle SP/physregs
-        if (OldReg ==
-                MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() &&
-            TargetRegisterInfo::isVirtualRegister(
-                MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) {
-          Changed = true;
-          unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
-          MO.setReg(NewReg);
-          MO.setIsDead();
-          MFI.stackifyVReg(NewReg);
-          MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
+        unsigned NewReg =
+            MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
+        Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
+        break;
+      }
+      case WebAssembly::CALL_I32:
+      case WebAssembly::CALL_I64: {
+        MachineOperand &Op1 = MI.getOperand(1);
+        if (Op1.isSymbol()) {
+          StringRef Name(Op1.getSymbolName());
+          if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
+              Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
+              Name == TLI.getLibcallName(RTLIB::MEMSET)) {
+            LibFunc::Func Func;
+            if (LibInfo.getLibFunc(Name, Func)) {
+              if (!MI.getOperand(2).isReg())
+                report_fatal_error(
+                    "Call to builtin function with wrong signature");
+              MachineOperand &MO = MI.getOperand(0);
+              unsigned OldReg = MO.getReg();
+              unsigned NewReg = MI.getOperand(2).getReg();
+              if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
+                report_fatal_error(
+                    "Call to builtin function with wrong signature");
+              Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
+            }
+          }
         }
       }
       }

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp?rev=258781&r1=258780&r2=258781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp Mon Jan 25 22:01:11 2016
@@ -17,12 +17,18 @@
 /// potentially also exposing the store to register stackifying. These both can
 /// reduce get_local/set_local traffic.
 ///
+/// This pass also performs this optimization for memcpy, memmove, and memset
+/// calls, since the LLVM intrinsics for these return void so they can't use the
+/// returned attribute and consequently aren't handled by the OptimizeReturned
+/// pass.
+///
 //===----------------------------------------------------------------------===//
 
 #include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -49,6 +55,7 @@ public:
     AU.addPreserved<MachineBlockFrequencyInfo>();
     AU.addRequired<MachineDominatorTree>();
     AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -63,6 +70,40 @@ FunctionPass *llvm::createWebAssemblySto
   return new WebAssemblyStoreResults();
 }
 
+// Replace uses of FromReg with ToReg if they are dominated by MI.
+static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
+                                 unsigned FromReg, unsigned ToReg,
+                                 const MachineRegisterInfo &MRI,
+                                 MachineDominatorTree &MDT) {
+  bool Changed = false;
+  for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+    MachineOperand &O = *I++;
+    MachineInstr *Where = O.getParent();
+    if (Where->getOpcode() == TargetOpcode::PHI) {
+      // PHIs use their operands on their incoming CFG edges rather than
+      // in their parent blocks. Get the basic block paired with this use
+      // of FromReg and check that MI's block dominates it.
+      MachineBasicBlock *Pred =
+          Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
+      if (!MDT.dominates(&MBB, Pred))
+        continue;
+    } else {
+      // For a non-PHI, check that MI dominates the instruction in the
+      // normal way.
+      if (&MI == Where || !MDT.dominates(&MI, Where))
+        continue;
+    }
+    Changed = true;
+    DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from "
+                 << MI << "\n");
+    O.setReg(ToReg);
+    // If the store's def was previously dead, it is no longer. But the
+    // dead flag shouldn't be set yet.
+    assert(!MI.getOperand(0).isDead() && "Unexpected dead flag");
+  }
+  return Changed;
+}
+
 bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
     dbgs() << "********** Store Results **********\n"
@@ -71,6 +112,9 @@ bool WebAssemblyStoreResults::runOnMachi
 
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  const WebAssemblyTargetLowering &TLI =
+      *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
+  auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   bool Changed = false;
 
   assert(MRI.isSSA() && "StoreResults depends on SSA form");
@@ -89,36 +133,38 @@ bool WebAssemblyStoreResults::runOnMachi
       case WebAssembly::STORE_F32:
       case WebAssembly::STORE_F64:
       case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64:
+      case WebAssembly::STORE_I64: {
         unsigned ToReg = MI.getOperand(0).getReg();
         unsigned FromReg =
             MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
-        for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
-          MachineOperand &O = *I++;
-          MachineInstr *Where = O.getParent();
-          if (Where->getOpcode() == TargetOpcode::PHI) {
-            // PHIs use their operands on their incoming CFG edges rather than
-            // in their parent blocks. Get the basic block paired with this use
-            // of FromReg and check that MI's block dominates it.
-            MachineBasicBlock *Pred =
-                Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
-            if (!MDT.dominates(&MBB, Pred))
-              continue;
-          } else {
-            // For a non-PHI, check that MI dominates the instruction in the
-            // normal way.
-            if (&MI == Where || !MDT.dominates(&MI, Where))
-              continue;
+        Changed |= ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
+        break;
+      }
+      case WebAssembly::CALL_I32:
+      case WebAssembly::CALL_I64: {
+        MachineOperand &Op1 = MI.getOperand(1);
+        if (Op1.isSymbol()) {
+          StringRef Name(Op1.getSymbolName());
+          if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
+              Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
+              Name == TLI.getLibcallName(RTLIB::MEMSET)) {
+            LibFunc::Func Func;
+            if (LibInfo.getLibFunc(Name, Func)) {
+              if (!MI.getOperand(2).isReg())
+                report_fatal_error(
+                    "Call to builtin function with wrong signature");
+              unsigned FromReg = MI.getOperand(2).getReg();
+              unsigned ToReg = MI.getOperand(0).getReg();
+              if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
+                report_fatal_error(
+                    "Call to builtin function with wrong signature");
+              Changed |=
+                  ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
+            }
           }
-          Changed = true;
-          DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
-                       << " from " << MI << "\n");
-          O.setReg(ToReg);
-          // If the store's def was previously dead, it is no longer. But the
-          // dead flag shouldn't be set yet.
-          assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
         }
       }
+      }
   }
 
   return Changed;

Modified: llvm/trunk/test/CodeGen/WebAssembly/global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/global.ll?rev=258781&r1=258780&r2=258781&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/global.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/global.ll Mon Jan 25 22:01:11 2016
@@ -21,8 +21,8 @@ define i32 @foo() {
 ; CHECK-LABEL: call_memcpy:
 ; CHECK-NEXT: .param          i32, i32, i32{{$}}
 ; CHECK-NEXT: .result         i32{{$}}
-; CHECK-NEXT: i32.call        $discard=, memcpy at FUNCTION, $0, $1, $2{{$}}
-; CHECK-NEXT: return          $0{{$}}
+; CHECK-NEXT: i32.call        $push0=, memcpy at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return          $pop0{{$}}
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
 define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
   tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)

Added: llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll?rev=258781&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll Mon Jan 25 22:01:11 2016
@@ -0,0 +1,60 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test memcpy, memmove, and memset intrinsics.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+; Test that return values are optimized.
+
+; CHECK-LABEL: copy_yes:
+; CHECK:      i32.call $push0=, memcpy at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return   $pop0{{$}}
+define i8* @copy_yes(i8* %dst, i8* %src, i32 %len) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
+  ret i8* %dst
+}
+
+; CHECK-LABEL: copy_no:
+; CHECK:      i32.call $discard=, memcpy at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return{{$}}
+define void @copy_no(i8* %dst, i8* %src, i32 %len) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: move_yes:
+; CHECK:      i32.call $push0=, memmove at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return   $pop0{{$}}
+define i8* @move_yes(i8* %dst, i8* %src, i32 %len) {
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
+  ret i8* %dst
+}
+
+; CHECK-LABEL: move_no:
+; CHECK:      i32.call $discard=, memmove at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return{{$}}
+define void @move_no(i8* %dst, i8* %src, i32 %len) {
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: set_yes:
+; CHECK:      i32.call $push0=, memset at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return   $pop0{{$}}
+define i8* @set_yes(i8* %dst, i8 %src, i32 %len) {
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
+  ret i8* %dst
+}
+
+; CHECK-LABEL: set_no:
+; CHECK:      i32.call $discard=, memset at FUNCTION, $0, $1, $2{{$}}
+; CHECK-NEXT: return{{$}}
+define void @set_no(i8* %dst, i8 %src, i32 %len) {
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
+  ret void
+}




More information about the llvm-commits mailing list