[llvm] 0800a36 - Revert "[NVVMReflect] Force dead branch elimination in NVVMReflect (#81189)"

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 8 15:32:13 PST 2024


Author: Joseph Huber
Date: 2024-02-08T17:32:04-06:00
New Revision: 0800a36053943beabe1c3f98fe4ecccbc192a2a7

URL: https://github.com/llvm/llvm-project/commit/0800a36053943beabe1c3f98fe4ecccbc192a2a7
DIFF: https://github.com/llvm/llvm-project/commit/0800a36053943beabe1c3f98fe4ecccbc192a2a7.diff

LOG: Revert "[NVVMReflect] Force dead branch elimination in NVVMReflect (#81189)"

This reverts commit 9211e67da36782db44a46ccb9ac06734ccf2570f.

Summary:
This seemed to crash one one of the CUDA math tests. Revert until it can
be fixed.

Added: 
    

Modified: 
    llvm/docs/NVPTXUsage.rst
    llvm/lib/Target/NVPTX/NVVMReflect.cpp
    llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll

Removed: 
    llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll


################################################################################
diff  --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index b5e3918e56e940..22acc6c9cb37f5 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -296,11 +296,6 @@ pipeline, immediately after the link stage. The ``internalize`` pass is also
 recommended to remove unused math functions from the resulting PTX. For an
 input IR module ``module.bc``, the following compilation flow is recommended:
 
-The ``NVVMReflect`` pass will attempt to remove dead code even without
-optimizations. This allows potentially incompatible instructions to be avoided
-at all optimizations levels. This currently only works for simple conditionals
-like the above example.
-
 1. Save list of external functions in ``module.bc``
 2. Link ``module.bc`` with ``libdevice.compute_XX.YY.bc``
 3. Internalize all functions not in list from (1)

diff  --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5283c2fff2c6c2..7d2678ae592748 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -20,7 +20,6 @@
 
 #include "NVPTX.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -37,8 +36,6 @@
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include <sstream>
 #include <string>
 #define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
@@ -90,7 +87,6 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
   }
 
   SmallVector<Instruction *, 4> ToRemove;
-  SmallVector<ICmpInst *, 4> ToSimplify;
 
   // Go through the calls in this function.  Each call to __nvvm_reflect or
   // llvm.nvvm.reflect should be a CallInst with a ConstantArray argument.
@@ -175,13 +171,6 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
     } else if (ReflectArg == "__CUDA_ARCH") {
       ReflectVal = SmVersion * 10;
     }
-
-    // If the immediate user is a simple comparison we want to simplify it.
-    // TODO: This currently does not handle switch instructions.
-    for (User *U : Call->users())
-      if (ICmpInst *I = dyn_cast<ICmpInst>(U))
-        ToSimplify.push_back(I);
-
     Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
     ToRemove.push_back(Call);
   }
@@ -189,57 +178,6 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
   for (Instruction *I : ToRemove)
     I->eraseFromParent();
 
-  // The code guarded by __nvvm_reflect may be invalid for the target machine.
-  // We need to do some basic dead code elimination to trim invalid code before
-  // it reaches the backend at all optimization levels.
-  SmallVector<BranchInst *> Simplified;
-  for (ICmpInst *Cmp : ToSimplify) {
-    Constant *LHS = dyn_cast<Constant>(Cmp->getOperand(0));
-    Constant *RHS = dyn_cast<Constant>(Cmp->getOperand(1));
-
-    if (!LHS || !RHS)
-      continue;
-
-    // If the comparison is a compile time constant we simply propagate it.
-    Constant *C = ConstantFoldCompareInstOperands(
-        Cmp->getPredicate(), LHS, RHS, Cmp->getModule()->getDataLayout());
-
-    if (!C)
-      continue;
-
-    for (User *U : Cmp->users())
-      if (BranchInst *I = dyn_cast<BranchInst>(U))
-        Simplified.push_back(I);
-
-    Cmp->replaceAllUsesWith(C);
-    Cmp->eraseFromParent();
-  }
-
-  // Each instruction here is a conditional branch off of a constant true or
-  // false value. Simply replace it with an unconditional branch to the
-  // appropriate basic block and delete the rest if it is trivially dead.
-  DenseSet<Instruction *> Removed;
-  for (BranchInst *Branch : Simplified) {
-    if (Removed.contains(Branch))
-      continue;
-
-    ConstantInt *C = dyn_cast<ConstantInt>(Branch->getCondition());
-    if (!C || (!C->isOne() && !C->isZero()))
-      continue;
-
-    BasicBlock *TrueBB =
-        C->isOne() ? Branch->getSuccessor(0) : Branch->getSuccessor(1);
-    BasicBlock *FalseBB =
-        C->isOne() ? Branch->getSuccessor(1) : Branch->getSuccessor(0);
-
-    ReplaceInstWithInst(Branch, BranchInst::Create(TrueBB));
-    if (FalseBB->use_empty() && FalseBB->hasNPredecessors(0) &&
-        FalseBB->getFirstNonPHIOrDbg()) {
-      Removed.insert(FalseBB->getFirstNonPHIOrDbg());
-      changeToUnreachable(FalseBB->getFirstNonPHIOrDbg());
-    }
-  }
-
   return ToRemove.size() > 0;
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
deleted file mode 100644
index c9586d5688f809..00000000000000
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
+++ /dev/null
@@ -1,141 +0,0 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_52
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_70
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx72 -O0 | FileCheck %s --check-prefix=SM_90
-
- at .str = private unnamed_addr constant [12 x i8] c"__CUDA_ARCH\00"
-
-declare i32 @__nvvm_reflect(ptr)
-
-;      SM_52: .visible .func  (.param .b32 func_retval0) foo()
-;      SM_52: mov.b32         %[[REG:.+]], 3;
-; SM_52-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-;      SM_70: .visible .func  (.param .b32 func_retval0) foo()
-;      SM_70: mov.b32         %[[REG:.+]], 2;
-; SM_70-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-;      SM_90: .visible .func  (.param .b32 func_retval0) foo()
-;      SM_90: mov.b32         %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_90-NEXT: ret;
-define i32 @foo() {
-entry:
-  %call = call i32 @__nvvm_reflect(ptr @.str)
-  %cmp = icmp uge i32 %call, 900
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  br label %return
-
-if.else:
-  %call1 = call i32 @__nvvm_reflect(ptr @.str)
-  %cmp2 = icmp uge i32 %call1, 700
-  br i1 %cmp2, label %if.then3, label %if.else4
-
-if.then3:
-  br label %return
-
-if.else4:
-  %call5 = call i32 @__nvvm_reflect(ptr @.str)
-  %cmp6 = icmp uge i32 %call5, 520
-  br i1 %cmp6, label %if.then7, label %if.else8
-
-if.then7:
-  br label %return
-
-if.else8:
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ 1, %if.then ], [ 2, %if.then3 ], [ 3, %if.then7 ], [ 4, %if.else8 ]
-  ret i32 %retval.0
-}
-
-;      SM_52: .visible .func  (.param .b32 func_retval0) bar()
-;      SM_52: mov.b32         %[[REG:.+]], 2;
-; SM_52-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-;      SM_70: .visible .func  (.param .b32 func_retval0) bar()
-;      SM_70: mov.b32         %[[REG:.+]], 1;
-; SM_70-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-;      SM_90: .visible .func  (.param .b32 func_retval0) bar()
-;      SM_90: mov.b32         %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32    [func_retval0+0], %[[REG:.+]];
-; SM_90-NEXT: ret;
-define i32 @bar() {
-entry:
-  %call = call i32 @__nvvm_reflect(ptr @.str)
-  %cmp = icmp uge i32 %call, 700
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  br label %if.end
-
-if.else:
-  br label %if.end
-
-if.end:
-  %x = phi i32 [ 1, %if.then ], [ 2, %if.else ]
-  ret i32 %x
-}
-
-; SM_52-NOT: valid;
-; SM_70: valid;
-; SM_90: valid;
-define void @baz() {
-entry:
-  %call = call i32 @__nvvm_reflect(ptr @.str)
-  %cmp = icmp uge i32 %call, 700
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  call void asm sideeffect "valid;\0A", ""()
-  br label %if.end
-
-if.end:
-  ret void
-}
-
-; SM_52: .visible .func  (.param .b32 func_retval0) qux()
-; SM_52: mov.u32         %[[REG1:.+]], %[[REG2:.+]];
-; SM_52: st.param.b32    [func_retval0+0], %[[REG1:.+]];
-; SM_52: ret;
-; SM_70: .visible .func  (.param .b32 func_retval0) qux()
-; SM_70: mov.u32         %[[REG1:.+]], %[[REG2:.+]];
-; SM_70: st.param.b32    [func_retval0+0], %[[REG1:.+]];
-; SM_70: ret;
-; SM_90: .visible .func  (.param .b32 func_retval0) qux()
-; SM_90: st.param.b32    [func_retval0+0], %[[REG1:.+]];
-; SM_90: ret;
-define i32 @qux() {
-entry:
-  %call = call i32 @__nvvm_reflect(ptr noundef @.str)
-  %cmp = icmp uge i32 %call, 700
-  %conv = zext i1 %cmp to i32
-  switch i32 %conv, label %sw.default [
-    i32 900, label %sw.bb
-    i32 700, label %sw.bb1
-    i32 520, label %sw.bb2
-  ]
-
-sw.bb:
-  br label %return
-
-sw.bb1:
-  br label %return
-
-sw.bb2:
-  br label %return
-
-sw.default:
-  br label %return
-
-return:
-  %retval = phi i32 [ 4, %sw.default ], [ 3, %sw.bb2 ], [ 2, %sw.bb1 ], [ 1, %sw.bb ]
-  ret i32 %retval
-}

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
index ac5875c6ab1043..e8c554c9ed5289 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -18,3 +18,4 @@ define i32 @foo(float %a, float %b) {
 ; SM35: ret i32 350  
   ret i32 %reflect
 }
+


        


More information about the llvm-commits mailing list