[llvm] [NVPTX] Remove incorrect elimination of CopyToReg (PR #149379)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 11:38:42 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Artem Belevich (Artem-B)
<details>
<summary>Changes</summary>
Fixes miscompilation introduced by #<!-- -->126337
https://github.com/llvm/llvm-project/pull/126337#issuecomment-3081431594
---
Full diff: https://github.com/llvm/llvm-project/pull/149379.diff
2 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+1-13)
- (added) llvm/test/CodeGen/NVPTX/pr126337.ll (+41)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d017c658c53a3..f7c913223cea7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4979,8 +4979,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
if (!NVPTX::isPackedVectorTy(ElementVT) || ElementVT == MVT::v4i8)
return SDValue();
- SmallVector<SDNode *> DeadCopyToRegs;
-
// Check whether all outputs are either used by an extractelt or are
// glue/chain nodes
if (!all_of(N->uses(), [&](SDUse &U) {
@@ -5008,12 +5006,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return !U.getUser()->use_empty();
}
- // Handle CopyToReg nodes that will become dead after our replacement
- if (U.getUser()->getOpcode() == ISD::CopyToReg) {
- DeadCopyToRegs.push_back(U.getUser());
- return true;
- }
-
// Otherwise, this use prevents us from splitting a value.
return false;
}))
@@ -5080,10 +5072,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (unsigned I : seq(NewLoad->getNumValues() - NewNumOutputs))
Results.push_back(NewLoad.getValue(NewNumOutputs + I));
- // Remove dead CopyToReg nodes by folding them into the chain they reference
- for (SDNode *CTR : DeadCopyToRegs)
- DCI.CombineTo(CTR, CTR->getOperand(0));
-
return DCI.DAG.getMergeValues(Results, DL);
}
@@ -6420,4 +6408,4 @@ void NVPTXTargetLowering::computeKnownBitsForTargetNode(
default:
break;
}
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/NVPTX/pr126337.ll b/llvm/test/CodeGen/NVPTX/pr126337.ll
new file mode 100644
index 0000000000000..32e411584b0e5
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/pr126337.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas-verify %}
+
+; This IR should compile without triggering assertions in LICM
+; when the CopyToReg from %0 in the first BB gets eliminated
+; but we still use its result in the second BB.
+; Technically the problem happens in MIR, but there are multiple
+; passes involved, so testing with the IR reproducer is more convenient.
+; https://github.com/llvm/llvm-project/pull/126337#issuecomment-3081431594
+
+target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define ptx_kernel void @Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel(<2 x float> %0) {
+; CHECK-LABEL: Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %.preheader15
+; CHECK-NEXT: ld.param.b64 %rd1, [Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel_param_0];
+; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%r1, tmp}, %rd1; }
+; CHECK-NEXT: setp.eq.f32 %p1, %r1, 0f00000000;
+; CHECK-NEXT: selp.b16 %rs1, 1, 0, %p1;
+; CHECK-NEXT: $L__BB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: mov.b64 %rd2, 0;
+; CHECK-NEXT: st.b8 [%rd2], %rs1;
+; CHECK-NEXT: bra.uni $L__BB0_1;
+.preheader15:
+ br label %1
+
+1: ; preds = %1, %.preheader15
+ %2 = fcmp oeq <2 x float> %0, zeroinitializer
+ %3 = extractelement <2 x i1> %2, i64 0
+ store i1 %3, ptr null, align 4
+ br label %1
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/149379
More information about the llvm-commits
mailing list