[llvm] AMDGPU: Add range attribute to mbcnt intrinsic callsites (PR #189191)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 28 13:15:57 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
It seems the known bits handling added in 686987a540bc176bceaad43ffe530cb3e88796d5
is insufficient to perform many range based optimizations. For some reason
computeConstantRange doesn't fall back on KnownBits, and has a separate,
less used form which tries to use computeKnownBits.
---
Patch is 27.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/189191.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+22-2)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll (+1-1)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll (+22-22)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll (+236-15)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4f866087a1309..1ef6bada6a351 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1422,11 +1422,31 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
- case Intrinsic::amdgcn_mbcnt_hi: {
+ case Intrinsic::amdgcn_mbcnt_hi:
// exec_hi is all 0, so this is just a copy.
if (ST->isWave32())
return IC.replaceInstUsesWith(II, II.getArgOperand(1));
- break;
+ [[fallthrough]];
+ case Intrinsic::amdgcn_mbcnt_lo: {
+ ConstantRange AccRange = computeConstantRange(II.getArgOperand(1),
+ /*ForSigned=*/false);
+ if (AccRange.isFullSet())
+ return nullptr;
+
+ // TODO: Can raise upper bound by inspecting first argument.
+ ConstantRange MbcntRange(APInt(32, 0), APInt(32, 32 + 1));
+ ConstantRange ComputedRange = AccRange.add(MbcntRange);
+ if (ComputedRange.isFullSet())
+ return nullptr;
+
+ if (std::optional<ConstantRange> ExistingRange = II.getRange()) {
+ ComputedRange = ComputedRange.intersectWith(*ExistingRange);
+ if (ComputedRange == *ExistingRange)
+ return nullptr;
+ }
+
+ II.addRangeRetAttr(ComputedRange);
+ return nullptr;
}
case Intrinsic::amdgcn_ballot: {
Value *Arg = II.getArgOperand(0);
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll b/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
index c87df041f8a26..1350f68ede271 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
@@ -6,7 +6,7 @@
define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NEXT: [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
index 8c2e6fe19bd20..87fc259bdebff 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
@@ -8,21 +8,21 @@
define i32 @test_wave_shuffle_self_select(i32 %val) {
; CHECK-W32-LABEL: define i32 @test_wave_shuffle_self_select(
; CHECK-W32-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-W32-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W32-NEXT: [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W32-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID]])
; CHECK-W32-NEXT: ret i32 [[RES]]
;
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_self_select(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT: [[TID1:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TID]])
+; CHECK-W64-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT: [[TID1:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID1]])
; CHECK-W64-NEXT: ret i32 [[RES]]
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_self_select(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
;
@@ -48,8 +48,8 @@ define i32 @test_wave_shuffle_dpp_row_share_0(i32 %val) {
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_0(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 112
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
@@ -75,8 +75,8 @@ define i32 @test_wave_shuffle_dpp_row_share_0_no_or(i32 %val) {
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_0_no_or(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 112
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
@@ -101,8 +101,8 @@ define i32 @test_wave_shuffle_dpp_row_share_7(i32 %val) {
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 48
; CHECK-NO-WAVE-SIZE-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -119,23 +119,23 @@ define i32 @test_wave_shuffle_dpp_row_share_7(i32 %val) {
define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(i32 %val) {
; CHECK-W32-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
; CHECK-W32-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W32-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W32-NEXT: [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W32-NEXT: [[SHARE_7:%.*]] = or i32 [[TID]], 7
; CHECK-W32-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
; CHECK-W32-NEXT: ret i32 [[RES]]
;
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT: [[TID1:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TID]])
+; CHECK-W64-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT: [[TID1:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-W64-NEXT: [[SHARE_7:%.*]] = or i32 [[TID1]], 7
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
; CHECK-W64-NEXT: ret i32 [[RES]]
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[SHARE_7:%.*]] = or i32 [[TID]], 7
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
@@ -158,7 +158,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
;
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT: [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W64-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 48
; CHECK-W64-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -166,7 +166,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 48
; CHECK-NO-WAVE-SIZE-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -190,8 +190,8 @@ define i32 @test_wave_shuffle_dpp_row_share_w32_mask(i32 %val) {
;
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_w32_mask(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-W64-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-W64-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 16
; CHECK-W64-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -199,8 +199,8 @@ define i32 @test_wave_shuffle_dpp_row_share_w32_mask(i32 %val) {
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_w32_mask(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 16
; CHECK-NO-WAVE-SIZE-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll b/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
index dfeaa872b846d..e4de4b947a1b6 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
@@ -72,19 +72,19 @@ define i32 @mbcnt_lo_hi(i32 %x, i32 %y, i32 %z) {
define i32 @ockl_lane_u32() {
; DEFAULT-LABEL: define i32 @ockl_lane_u32() {
-; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; DEFAULT-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT: [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; DEFAULT-NEXT: ret i32 [[HI]]
;
; WAVE32-LABEL: define i32 @ockl_lane_u32
; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE32-NEXT: ret i32 [[LO]]
;
; WAVE64-LABEL: define i32 @ockl_lane_u32
; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; WAVE64-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT: [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; WAVE64-NEXT: ret i32 [[HI]]
;
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -94,17 +94,17 @@ define i32 @ockl_lane_u32() {
define i32 @mbcnt_lo_and63() {
; DEFAULT-LABEL: define i32 @mbcnt_lo_and63() {
-; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; DEFAULT-NEXT: ret i32 [[LO]]
;
; WAVE32-LABEL: define i32 @mbcnt_lo_and63
; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE32-NEXT: ret i32 [[LO]]
;
; WAVE64-LABEL: define i32 @mbcnt_lo_and63
; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE64-NEXT: ret i32 [[LO]]
;
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -114,7 +114,7 @@ define i32 @mbcnt_lo_and63() {
define i32 @mbcnt_hi_and31() {
; DEFAULT-LABEL: define i32 @mbcnt_hi_and31() {
-; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
+; DEFAULT-NEXT: [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
; DEFAULT-NEXT: ret i32 [[HI]]
;
; WAVE32-LABEL: define i32 @mbcnt_hi_and31
@@ -123,7 +123,7 @@ define i32 @mbcnt_hi_and31() {
;
; WAVE64-LABEL: define i32 @mbcnt_hi_and31
; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
+; WAVE64-NEXT: [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
; WAVE64-NEXT: ret i32 [[HI]]
;
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
@@ -133,19 +133,19 @@ define i32 @mbcnt_hi_and31() {
define i32 @ockl_lane_u32_and127() {
; DEFAULT-LABEL: define i32 @ockl_lane_u32_and127() {
-; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; DEFAULT-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT: [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; DEFAULT-NEXT: ret i32 [[HI]]
;
; WAVE32-LABEL: define i32 @ockl_lane_u32_and127
; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE32-NEXT: ret i32 [[LO]]
;
; WAVE64-LABEL: define i32 @ockl_lane_u32_and127
; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; WAVE64-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT: [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; WAVE64-NEXT: ret i32 [[HI]]
;
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -153,5 +153,226 @@ define i32 @ockl_lane_u32_and127() {
%and = and i32 %hi, 127
ret i32 %and
}
+
+define i32 @known_range_mbcnt_lo(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @known_range_mbcnt_lo
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; DEFAULT-NEXT: ret i32 [[LO]]
+;
+; WAVE32-LABEL: define i32 @known_range_mbcnt_lo
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; WAVE32-NEXT: ret i32 [[LO]]
+;
+; WAVE64-LABEL: define i32 @known_range_mbcnt_lo
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT: [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; WAVE64-NEXT: ret i32 [[LO]]
+;
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %unknown, i32 0)
+ %min = call i32 @llvm.umin.i32(i32 %lo, i32 32)
+ ret i32 %min
+}
+
+define i32 @mbcnt_lo_add_unknown(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @mbcnt_lo_add_unknown
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; DEFAULT-NEXT: ret i32 [[LO]]
+;
+; WAVE32-LABEL: define i32 @mbcnt_lo_add_unknown
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; WAVE32-NEXT: ret i32 [[LO]]
+;
+; WAVE64-LABEL: define i32 @mbcnt_lo_add_unknown
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; WAVE64-NEXT: ret i32 [[LO]]
+;
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 %unknown)
+ ret i32 %lo
+}
+
+define i32 @mbcnt_hi_add_unknown(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @mbcnt_hi_add_unknown
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[UNKNOWN]])
+; DEFAULT-NEXT: ret i32 [[HI]]
+;
+; WAVE32-LABEL: define i32 @mbcnt_hi_add_unknown
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT: ret i32 [[UNKNOWN]]
+;
+; WAVE64-LABEL: define i32 @mbcnt_hi_add_unknown
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[UNKNOWN]])
+; WAVE64-NEXT: ret i32 [[HI]]
+;
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %unknown)
+ ret i32 %hi
+}
+
+define i32 @known_range_mbcnt_hi(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @known_range_mbcnt_hi
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT: [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 [[UNKNOWN]], i32 0)
+; DEFAULT-NEXT: ret i32 [[HI]]
+;
+; WAVE32-LABEL: define i32 @known_range_mbcnt_hi
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT: ret i32 0
+;
+; WAVE64-LABEL: define i32 @known_range_mbcnt_hi
+; WAVE64-SAM...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/189191
More information about the llvm-commits
mailing list