[llvm] 83c26ea - [WebAssembly] Remove TEEs when dests are unstackified

Heejin Ahn via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 19 14:55:39 PDT 2020


Author: Heejin Ahn
Date: 2020-06-19T14:55:21-07:00
New Revision: 83c26eae234964fd96546265bcb94295f95617f6

URL: https://github.com/llvm/llvm-project/commit/83c26eae234964fd96546265bcb94295f95617f6
DIFF: https://github.com/llvm/llvm-project/commit/83c26eae234964fd96546265bcb94295f95617f6.diff

LOG: [WebAssembly] Remove TEEs when dests are unstackified

When created in RegStackify pass, `TEE` has two destinations, where
op0 is stackified and op1 is not. But it is possible that
op0 becomes unstackified in `fixUnwindMismatches` function in
CFGStackify pass when a nested try-catch-end is introduced, violating
the invariant of `TEE`s destinations.

In this case we convert the `TEE` into two `COPY`s, which will
eventually be resolved in ExplicitLocals.

Reviewed By: dschuff

Differential Revision: https://reviews.llvm.org/D81851

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
    llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index ddf2d8bdfc05..8cbfc98e8197 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -733,12 +733,30 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
   }
 }
 
+// Get the appropriate copy opcode for the given register class.
+static unsigned getCopyOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::COPY_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::COPY_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::COPY_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::COPY_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::COPY_V128;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::COPY_EXNREF;
+  llvm_unreachable("Unexpected register class");
+}
+
 // When MBB is split into MBB and Split, we should unstackify defs in MBB that
 // have their uses in Split.
 static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
                                          MachineBasicBlock &Split,
                                          WebAssemblyFunctionInfo &MFI,
-                                         MachineRegisterInfo &MRI) {
+                                         MachineRegisterInfo &MRI,
+                                         const WebAssemblyInstrInfo &TII) {
   for (auto &MI : Split) {
     for (auto &MO : MI.explicit_uses()) {
       if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg()))
@@ -748,6 +766,47 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
           MFI.unstackifyVReg(MO.getReg());
     }
   }
+
+  // In RegStackify, when a register definition is used multiple times,
+  //    Reg = INST ...
+  //    INST ..., Reg, ...
+  //    INST ..., Reg, ...
+  //    INST ..., Reg, ...
+  //
+  // we introduce a TEE, which has the following form:
+  //    DefReg = INST ...
+  //    TeeReg, Reg = TEE_... DefReg
+  //    INST ..., TeeReg, ...
+  //    INST ..., Reg, ...
+  //    INST ..., Reg, ...
+  // with DefReg and TeeReg stackified but Reg not stackified.
+  //
+  // But the invariant that TeeReg should be stackified can be violated while we
+  // unstackify registers in the split BB above. In this case, we convert TEEs
+  // into two COPYs. This COPY will be eventually eliminated in ExplicitLocals.
+  //    DefReg = INST ...
+  //    TeeReg = COPY DefReg
+  //    Reg = COPY DefReg
+  //    INST ..., TeeReg, ...
+  //    INST ..., Reg, ...
+  //    INST ..., Reg, ...
+  for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
+    MachineInstr &MI = *I++;
+    if (!WebAssembly::isTee(MI.getOpcode()))
+      continue;
+    Register TeeReg = MI.getOperand(0).getReg();
+    Register Reg = MI.getOperand(1).getReg();
+    Register DefReg = MI.getOperand(2).getReg();
+    if (!MFI.isVRegStackified(TeeReg)) {
+      // Now we are not using TEE anymore, so unstackify DefReg too
+      MFI.unstackifyVReg(DefReg);
+      unsigned CopyOpc = getCopyOpcode(MRI.getRegClass(DefReg));
+      BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg)
+          .addReg(DefReg);
+      BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg);
+      MI.eraseFromParent();
+    }
+  }
 }
 
 bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
@@ -1068,7 +1127,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
     BrDest->insert(BrDest->end(), EndTry->removeFromParent());
     // Take out the handler body from EH pad to the new branch destination BB.
     BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
-    unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI);
+    unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI, TII);
     // Fix predecessor-successor relationship.
     BrDest->transferSuccessors(EHPad);
     EHPad->addSuccessor(BrDest);
@@ -1206,7 +1265,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
       // new nested continuation BB.
       NestedCont->splice(NestedCont->end(), MBB,
                          std::next(RangeEnd->getIterator()), MBB->end());
-      unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI);
+      unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI, TII);
       registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
 
       // Fix predecessor-successor relationship.

diff  --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll
index fcc30466594e..6c7b8f9cdf8a 100644
--- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll
+++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling
 ; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT
 ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS
 ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT-STAT
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
@@ -533,16 +534,56 @@ try.cont:                                         ; preds = %catch.start0
   ret i32 0
 }
 
+; Tests the case when TEE stackifies a register in RegStackify but it gets
+; unstackified in fixUnwindMismatches in CFGStackify.
+
+; NOSORT-LOCALS-LABEL: test8
+define void @test8(i32 %x) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+bb0:
+  invoke void @foo()
+          to label %bb1 unwind label %catch.dispatch0
+
+bb1:                                              ; preds = %bb0
+  %t = add i32 %x, 4
+  ; This %addr is used in multiple places, so tee is introduced in RegStackify,
+  ; which stackifies the use of %addr in store instruction. A tee has two dest
+  ; registers, the first of which is stackified and the second is not.
+  ; But when we introduce a nested try-catch in fixUnwindMismatches in
+  ; CFGStackify, it is possible that we end up unstackifying the first dest
+  ; register. In that case, we convert that tee into a copy.
+  %addr = inttoptr i32 %t to i32*
+  %load = load i32, i32* %addr
+  %call = call i32 @baz()
+  %add = add i32 %load, %call
+  store i32 %add, i32* %addr
+  ret void
+; NOSORT-LOCALS:       i32.add
+; NOSORT-LOCALS-NOT:   local.tee
+; NOSORT-LOCALS-NEXT:  local.set
+
+catch.dispatch0:                                  ; preds = %bb0
+  %0 = catchswitch within none [label %catch.start0] unwind to caller
+
+catch.start0:                                     ; preds = %catch.dispatch0
+  %1 = catchpad within %0 [i8* null]
+  %2 = call i8* @llvm.wasm.get.exception(token %1)
+  %3 = call i32 @llvm.wasm.get.ehselector(token %1)
+  catchret from %1 to label %try.cont
+
+try.cont:                                         ; preds = %catch.start0
+  ret void
+}
+
 ; If not for the unwind destination mismatch, the LOOP marker here would have an
 ; i32 signature. But because we add a rethrow instruction at the end of the
 ; appendix block, now the LOOP marker does not have a signature (= has a void
 ; signature). Here the two calls two 'bar' are supposed to throw up to the
 ; caller, but incorrectly unwind to 'catch19' after linearizing the CFG.
 
-; NOSORT-LABEL: test8
+; NOSORT-LABEL: test9
 ; NOSORT: block
 ; NOSORT-NOT: loop      i32
-; NOSORT:   loop                                       # label40:
+; NOSORT:   loop                                       # label42:
 ; NOSORT:     try
 ; NOSORT:       call      foo
 ; --- Nested try/catch/end_try starts
@@ -550,18 +591,18 @@ try.cont:                                         ; preds = %catch.start0
 ; NOSORT:         call      bar
 ; NOSORT:         call      bar
 ; NOSORT:       catch     $[[REG:[0-9]+]]=
-; NOSORT:         br        1                          # 1: down to label41
+; NOSORT:         br        1                          # 1: down to label43
 ; NOSORT:       end_try
 ; --- Nested try/catch/end_try ends
 ; NOSORT:       return    {{.*}}
-; NOSORT:     catch     $drop=                         # catch21:
-; NOSORT:       br        1                            # 1: up to label40
-; NOSORT:     end_try                                  # label41:
+; NOSORT:     catch     $drop=                         # catch23:
+; NOSORT:       br        1                            # 1: up to label42
+; NOSORT:     end_try                                  # label43:
 ; NOSORT:   end_loop
 ; NOSORT: end_block
 ; NOSORT: rethrow   $[[REG]]                           # to caller
 
-define i32 @test8(i32* %p) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define i32 @test9(i32* %p) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
   store volatile i32 0, i32* %p
   br label %loop
@@ -595,7 +636,7 @@ try.cont:                                         ; preds = %catch.start
 ; - A may-throw instruction unwinds to an incorrect EH pad after linearizing the
 ;   CFG, when it is supposed to unwind to the caller.
 
-; NOSORT-LABEL: test9
+; NOSORT-LABEL: test10
 ; NOSORT: block
 ; NOSORT:   block
 ; NOSORT:     try
@@ -605,40 +646,40 @@ try.cont:                                         ; preds = %catch.start
 ; NOSORT:         try
 ; NOSORT:           call      bar
 ; NOSORT:         catch     $[[REG0:[0-9]+]]=
-; NOSORT:           br        2                        # 2: down to label45
+; NOSORT:           br        2                        # 2: down to label47
 ; NOSORT:         end_try
 ; --- Nested try/catch/end_try ends
-; NOSORT:         br        2                          # 2: down to label44
+; NOSORT:         br        2                          # 2: down to label46
 ; NOSORT:       catch     {{.*}}
 ; NOSORT:         block     i32
-; NOSORT:           br_on_exn   0, {{.*}}              # 0: down to label48
+; NOSORT:           br_on_exn   0, {{.*}}              # 0: down to label50
 ; --- Nested try/catch/end_try starts
 ; NOSORT:           try
-; NOSORT:             rethrow   {{.*}}                 # down to catch26
-; NOSORT:           catch     $[[REG1:[0-9]+]]=        # catch26:
-; NOSORT:             br        5                      # 5: down to label43
+; NOSORT:             rethrow   {{.*}}                 # down to catch28
+; NOSORT:           catch     $[[REG1:[0-9]+]]=        # catch28:
+; NOSORT:             br        5                      # 5: down to label45
 ; NOSORT:           end_try
 ; --- Nested try/catch/end_try ends
-; NOSORT:         end_block                            # label48:
+; NOSORT:         end_block                            # label50:
 ; NOSORT:         call      $drop=, __cxa_begin_catch
 ; --- Nested try/catch/end_try starts
 ; NOSORT:         try
 ; NOSORT:           call      __cxa_end_catch
 ; NOSORT:         catch     $[[REG1]]=
-; NOSORT:           br        4                        # 4: down to label43
+; NOSORT:           br        4                        # 4: down to label45
 ; NOSORT:         end_try
 ; --- Nested try/catch/end_try ends
-; NOSORT:         br        2                          # 2: down to label44
+; NOSORT:         br        2                          # 2: down to label46
 ; NOSORT:       end_try
 ; NOSORT:     catch     $[[REG0]]=
-; NOSORT:     end_try                                  # label45:
+; NOSORT:     end_try                                  # label47:
 ; NOSORT:     call      $drop=, __cxa_begin_catch
 ; NOSORT:     call      __cxa_end_catch
-; NOSORT:   end_block                                  # label44:
+; NOSORT:   end_block                                  # label46:
 ; NOSORT:   return
-; NOSORT: end_block                                    # label43:
+; NOSORT: end_block                                    # label45:
 ; NOSORT: rethrow   $[[REG1]]                          # to caller
-define void @test9() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define void @test10() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 bb0:
   invoke void @foo()
           to label %bb1 unwind label %catch.dispatch0
@@ -688,7 +729,7 @@ try.cont:                                         ; preds = %catch.start1, %catc
 ; NOOPT:   call      foo
 ; NOOPT: end_block
 ; NOOPT: return
-define void @test10(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define void @test11(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
   %tobool = icmp ne i32 %arg, 0
   br i1 %tobool, label %if.then, label %if.end
@@ -725,7 +766,7 @@ if.end:                                           ; preds = %cont, %catch.start,
 ; invoke.cont BB fall within try~end_try, but they shouldn't cause crashes or
 ; unwinding destination mismatches in CFGStackify.
 
-; NOSORT-LABEL: test11
+; NOSORT-LABEL: test12
 ; NOSORT: try
 ; NOSORT:   call  foo
 ; NOSORT:   call {{.*}} memcpy
@@ -735,7 +776,7 @@ if.end:                                           ; preds = %cont, %catch.start,
 ; NOSORT: catch
 ; NOSORT:   rethrow
 ; NOSORT: end_try
-define void @test11(i8* %a, i8* %b) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define void @test12(i8* %a, i8* %b) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
   %o = alloca %class.Object, align 1
   invoke void @foo()
@@ -759,11 +800,11 @@ ehcleanup:                                        ; preds = %entry
 ; 'nothrow_i32' and 'fun', because the return value of 'nothrow_i32' is
 ; stackified and pushed onto the stack to be consumed by the call to 'fun'.
 
-; CHECK-LABEL: test12
+; CHECK-LABEL: test13
 ; CHECK: try
 ; CHECK: call      $push{{.*}}=, nothrow_i32
 ; CHECK: call      fun, $pop{{.*}}
-define void @test12() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define void @test13() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
   %call = call i32 @nothrow_i32()
   invoke void @fun(i32 %call)
@@ -784,7 +825,7 @@ terminate:                                        ; preds = %entry
 ; This crashed on debug mode (= when NDEBUG is not defined) when the logic for
 ; computing the innermost region was not correct, in which a loop region
 ; contains an exception region. This should pass CFGSort without crashing.
-define void @test13() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+define void @test14() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
   %e = alloca %class.MyClass, align 4
   br label %for.cond
@@ -935,7 +976,7 @@ try.cont:                                         ; preds = %catch.start, %for.e
 }
 
 ; Check if the unwind destination mismatch stats are correct
-; NOSORT-STAT: 16 wasm-cfg-stackify    - Number of EH pad unwind mismatches found
+; NOSORT-STAT: 17 wasm-cfg-stackify    - Number of EH pad unwind mismatches found
 
 declare void @foo()
 declare void @bar()


        


More information about the llvm-commits mailing list