[llvm] 844a1d5 - [IR] Return correct memory effects for `convergencectrl` (#129874)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 06:14:50 PST 2025
Author: Yingwei Zheng
Date: 2025-03-05T22:14:47+08:00
New Revision: 844a1d52a8f5dff032cbf58288675ad1e678d609
URL: https://github.com/llvm/llvm-project/commit/844a1d52a8f5dff032cbf58288675ad1e678d609
DIFF: https://github.com/llvm/llvm-project/commit/844a1d52a8f5dff032cbf58288675ad1e678d609.diff
LOG: [IR] Return correct memory effects for `convergencectrl` (#129874)
`convergencectrl` doesn't imply any memory access.
Closes https://github.com/llvm/llvm-project/issues/129856.
Added:
llvm/test/Transforms/DCE/op_bundles.ll
Modified:
llvm/lib/IR/Instructions.cpp
llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index b5d1bc81b9d95..d2cf0ae2c1778 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -609,15 +609,17 @@ bool CallBase::hasReadingOperandBundles() const {
// Implementation note: this is a conservative implementation of operand
// bundle semantics, where *any* non-assume operand bundle (other than
// ptrauth) forces a callsite to be at least readonly.
- return hasOperandBundlesOtherThan(
- {LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+ return hasOperandBundlesOtherThan({LLVMContext::OB_ptrauth,
+ LLVMContext::OB_kcfi,
+ LLVMContext::OB_convergencectrl}) &&
getIntrinsicID() != Intrinsic::assume;
}
bool CallBase::hasClobberingOperandBundles() const {
return hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
- LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+ LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi,
+ LLVMContext::OB_convergencectrl}) &&
getIntrinsicID() != Intrinsic::assume;
}
diff --git a/llvm/test/Transforms/DCE/op_bundles.ll b/llvm/test/Transforms/DCE/op_bundles.ll
new file mode 100644
index 0000000000000..0d3b4db8265e8
--- /dev/null
+++ b/llvm/test/Transforms/DCE/op_bundles.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce | FileCheck %s
+
+define void @dead_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define void @dead_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ %t = tail call token @llvm.experimental.convergence.entry()
+ %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+ ret void
+}
diff --git a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
index 633cf091da508..88eff971b9576 100644
--- a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
+++ b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
@@ -7,14 +7,14 @@ define i32 @nested(i32 %src) #0 {
; CHECK-SAME: i32 [[A0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[BB15160:.*:]]
; CHECK-NEXT: [[T1:%.*]] = call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT: %"vl15001llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
-; CHECK-NEXT: %"op68297llvm.amdgcn.readfirstlane.i32([[A0]], vl15001llvm.experimental.convergence.anchor())" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl15001llvm.experimental.convergence.anchor()") ]
-; CHECK-NEXT: ret i32 undef
+; CHECK-NEXT: %"vl77672llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: %"op68297(vl77672)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl77672llvm.experimental.convergence.anchor()") ]
+; CHECK-NEXT: ret i32 %"op68297(vl77672)"
;
%t1 = call token @llvm.experimental.convergence.entry()
%t2 = call token @llvm.experimental.convergence.anchor()
%r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
- ret i32 undef
+ ret i32 %r2
}
; Function Attrs: convergent nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index 530cd1831cfaa..e9d3b5e963b35 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -349,7 +349,6 @@ define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) co
; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[SRC]], i64 0
; CHECK-NEXT: [[ELT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP1]]) [ "convergencectrl"(token [[T]]) ]
; CHECK-NEXT: ret i32 [[ELT]]
More information about the llvm-commits
mailing list