[llvm] [llvm][NVPTX] Fix RAUW bug in NVPTXProxyRegErasure (PR #105871)
Jeff Niu via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 11:54:31 PDT 2024
https://github.com/Mogball updated https://github.com/llvm/llvm-project/pull/105871
>From 2a50cb17baac0449bf6b21e981fa786d52dea390 Mon Sep 17 00:00:00 2001
From: Mogball <jeff at modular.com>
Date: Fri, 23 Aug 2024 14:28:36 -0400
Subject: [PATCH 1/2] [llvm][NVPTX] Fix RAUW bug in NVPTXProxyRegErasure
Fix bug introduced in #105730
---
llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp | 6 +++++-
llvm/test/CodeGen/NVPTX/bug105730.ll | 17 +++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/NVPTX/bug105730.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index f3a3362addb0ea..16c2b307efabfb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -78,7 +78,11 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
assert(InOp.isReg() && "ProxyReg input should be a register.");
assert(OutOp.isReg() && "ProxyReg output should be a register.");
RemoveList.push_back(&MI);
- RAUWBatch.try_emplace(OutOp.getReg(), InOp.getReg());
+ Register replacement = InOp.getReg();
+ // Check if the replacement itself has been replaced.
+ if (auto it = RAUWBatch.find(replacement); it != RAUWBatch.end())
+ replacement = it->second;
+ RAUWBatch.try_emplace(OutOp.getReg(), replacement);
break;
}
}
diff --git a/llvm/test/CodeGen/NVPTX/bug105730.ll b/llvm/test/CodeGen/NVPTX/bug105730.ll
new file mode 100644
index 00000000000000..718e7ca6b80fd8
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/bug105730.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -verify-machineinstrs
+
+; Check that llc doesn't crash.
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @__builtin_splat_i8(i32 %0) {
+.lr.ph:
+ %1 = trunc i32 %0 to i8
+ %broadcast.splatinsert = insertelement <4 x i8> poison, i8 %1, i64 0
+ %broadcast.splat = shufflevector <4 x i8> %broadcast.splatinsert, <4 x i8> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ store <4 x i8> %broadcast.splat, ptr addrspace(1) poison, align 1
+ br label %vector.body
+}
>From 29fd526f2ed796c9152a1266bd08f37d1f1e9810 Mon Sep 17 00:00:00 2001
From: Mogball <jeff at modular.com>
Date: Fri, 23 Aug 2024 14:54:12 -0400
Subject: [PATCH 2/2] move test to proxy-reg-erasure.ll
---
llvm/test/CodeGen/NVPTX/bug105730.ll | 17 -----------------
.../CodeGen/NVPTX/proxy-reg-erasure-mir.ll | 19 +++++++++++++++++++
2 files changed, 19 insertions(+), 17 deletions(-)
delete mode 100644 llvm/test/CodeGen/NVPTX/bug105730.ll
diff --git a/llvm/test/CodeGen/NVPTX/bug105730.ll b/llvm/test/CodeGen/NVPTX/bug105730.ll
deleted file mode 100644
index 718e7ca6b80fd8..00000000000000
--- a/llvm/test/CodeGen/NVPTX/bug105730.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs
-
-; Check that llc doesn't crash.
-
-target triple = "nvptx64-nvidia-cuda"
-
-define void @__builtin_splat_i8(i32 %0) {
-.lr.ph:
- %1 = trunc i32 %0 to i8
- %broadcast.splatinsert = insertelement <4 x i8> poison, i8 %1, i64 0
- %broadcast.splat = shufflevector <4 x i8> %broadcast.splatinsert, <4 x i8> poison, <4 x i32> zeroinitializer
- br label %vector.body
-
-vector.body:
- store <4 x i8> %broadcast.splat, ptr addrspace(1) poison, align 1
- br label %vector.body
-}
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll
index 6bfbe2aea8196c..600e1de73690f7 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll
@@ -7,6 +7,9 @@
; Check ProxyRegErasure pass MIR manipulation.
declare <4 x i32> @callee_vec_i32()
+declare void @use_vec_i32(<4 x i32>)
+
+; MIR: check_vec_i32
define <4 x i32> @check_vec_i32() {
; MIR: body:
; MIR-DAG: Callseq_Start {{[0-9]+}}, {{[0-9]+}}
@@ -23,3 +26,19 @@ define <4 x i32> @check_vec_i32() {
%ret = call <4 x i32> @callee_vec_i32()
ret <4 x i32> %ret
}
+
+; MIR: check_chained_proxy
+define void @check_chained_proxy(i8 %0) {
+ ; MIR: body:
+ ; MIR-BEFORE: %0:int32regs = ProxyRegI32 killed %4
+ %broadcast.splatinsert = insertelement <4 x i8> poison, i8 %0, i64 0
+ %broadcast.splat = shufflevector <4 x i8> %broadcast.splatinsert, <4 x i8> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ ; MIR-BEFORE: %5:int32regs = ProxyRegI32 %0
+ ; MIR-BEFORE: SRLi32ri %5,
+ ; MIR-AFTER: SRLi32ri %4,
+ store <4 x i8> %broadcast.splat, ptr poison, align 1
+ br label %vector.body
+}
More information about the llvm-commits
mailing list