[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Oct 9 02:18:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
These should be well behaved address computations.
---
Full diff: https://github.com/llvm/llvm-project/pull/111652.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp (+7-3)
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ea11002bb6a5fa..a774ad53b5bede 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -112,9 +112,13 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
Type *Int64Ty = IntegerType::getInt64Ty(C);
auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
- auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
- auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
- auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
+ auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true,
+ /*HasNSW=*/true);
+ auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "",
+ /*isExact=*/true);
+ auto *Offset =
+ IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1), "", /*HasNUW=*/true,
+ /*HasNSW=*/true);
Start = IRB.CreateInBoundsGEP(
PtrArrayTy, Begin,
ArrayRef<Value *>({ConstantInt::get(Int64Ty, 0), Offset}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index a87e07cb57e05e..968871af2d059a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -64,8 +64,8 @@ define void @bar() addrspace(1) {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index a423b320db559d..98497a64e3204c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -79,8 +79,8 @@ define internal void @bar() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index 309ecb17e79ed1..a137f31c7aeeca 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -71,8 +71,8 @@ define internal void @bar.5() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/111652
More information about the llvm-branch-commits
mailing list