[llvm] [llvm][NVPTX] Fix RAUW bug in NVPTXProxyRegErasure (PR #105871)

Jeff Niu via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 23 11:33:18 PDT 2024


https://github.com/Mogball created https://github.com/llvm/llvm-project/pull/105871

Fix bug introduced in #105730

>From 2a50cb17baac0449bf6b21e981fa786d52dea390 Mon Sep 17 00:00:00 2001
From: Mogball <jeff at modular.com>
Date: Fri, 23 Aug 2024 14:28:36 -0400
Subject: [PATCH] [llvm][NVPTX] Fix RAUW bug in NVPTXProxyRegErasure

Fix bug introduced in #105730
---
 llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp |  6 +++++-
 llvm/test/CodeGen/NVPTX/bug105730.ll           | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/bug105730.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index f3a3362addb0ea..16c2b307efabfb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -78,7 +78,11 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
         assert(InOp.isReg() && "ProxyReg input should be a register.");
         assert(OutOp.isReg() && "ProxyReg output should be a register.");
         RemoveList.push_back(&MI);
-        RAUWBatch.try_emplace(OutOp.getReg(), InOp.getReg());
+        Register replacement = InOp.getReg();
+        // Check if the replacement itself has been replaced.
+        if (auto it = RAUWBatch.find(replacement); it != RAUWBatch.end())
+          replacement = it->second;
+        RAUWBatch.try_emplace(OutOp.getReg(), replacement);
         break;
       }
       }
diff --git a/llvm/test/CodeGen/NVPTX/bug105730.ll b/llvm/test/CodeGen/NVPTX/bug105730.ll
new file mode 100644
index 00000000000000..718e7ca6b80fd8
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/bug105730.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -verify-machineinstrs
+
+; Check that llc doesn't crash.
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @__builtin_splat_i8(i32 %0) {
+.lr.ph:
+  %1 = trunc i32 %0 to i8
+  %broadcast.splatinsert = insertelement <4 x i8> poison, i8 %1, i64 0
+  %broadcast.splat = shufflevector <4 x i8> %broadcast.splatinsert, <4 x i8> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  store <4 x i8> %broadcast.splat, ptr addrspace(1) poison, align 1
+  br label %vector.body
+}



More information about the llvm-commits mailing list