[llvm] 1d1cc05 - AMDGPU: mbcnt allow for non-zero src1 for known-bits
David Stuttard via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 11 05:26:11 PDT 2022
Author: David Stuttard
Date: 2022-08-11T13:23:43+01:00
New Revision: 1d1cc05539e275ae7666fc4b44bf725ec335078a
URL: https://github.com/llvm/llvm-project/commit/1d1cc05539e275ae7666fc4b44bf725ec335078a
DIFF: https://github.com/llvm/llvm-project/commit/1d1cc05539e275ae7666fc4b44bf725ec335078a.diff
LOG: AMDGPU: mbcnt allow for non-zero src1 for known-bits
Src1 for mbcnt can be a non-zero literal or register. Take this into account
when calculating known bits.
Differential Revision: https://reviews.llvm.org/D131478
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index cbee4b6604eee..73bd9817f3903 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4602,9 +4602,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
case Intrinsic::amdgcn_mbcnt_hi: {
const GCNSubtarget &ST =
DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
- // These return at most the wavefront size - 1.
+ // These return at most the (wavefront size - 1) + src1
+ // As long as src1 is an immediate we can calc known bits
+ KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
+ unsigned Src1ValBits = Src1Known.countMaxActiveBits();
+ unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2());
+ // Cater for potential carry
+ MaxActiveBits += Src1ValBits ? 1 : 0;
unsigned Size = Op.getValueType().getSizeInBits();
- Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
+ if (MaxActiveBits < Size)
+ Known.Zero.setHighBits(Size - MaxActiveBits);
break;
}
case Intrinsic::amdgcn_workitem_id_x:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
index 04405470aff0c..88d6bea38b100 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -14,24 +14,79 @@ main_body:
ret void
}
-; GCN-LABEL: {{^}}mbcnt_lo_known_bits:
+; GCN-LABEL: {{^}}mbcnt_lo_known_bits_1:
; GCN: v_mbcnt_lo_u32_b32
-; GCN-NOT: and
-define i32 @mbcnt_lo_known_bits(i32 %x, i32 %y) #0 {
+; GCN: v_and_b32_e32
+define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 {
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 %y)
%mask = and i32 %lo, 63
ret i32 %mask
}
-; GCN-LABEL: {{^}}mbcnt_hi_known_bits:
-; GCN: v_mbcnt_hi_u32_b32
+; GCN-LABEL: {{^}}mbcnt_lo_known_bits_2:
+; GCN: v_mbcnt_lo_u32_b32
+; GCN-NOT: and
+define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 {
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 0)
+ %mask = and i32 %lo, 63
+ ret i32 %mask
+}
+
+; GCN-LABEL: {{^}}mbcnt_lo_known_bits_3:
+; GCN: v_mbcnt_lo_u32_b32
; GCN-NOT: and
-define i32 @mbcnt_hi_known_bits(i32 %x, i32 %y) #0 {
+define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 {
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15)
+ %mask = and i32 %lo, 127
+ ret i32 %mask
+}
+
+; GCN-LABEL: {{^}}mbcnt_lo_known_bits_4:
+; GCN: v_mbcnt_lo_u32_b32
+; GCN: v_and_b32_e32
+define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 {
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15)
+ %mask = and i32 %lo, 63
+ ret i32 %mask
+}
+
+
+; GCN-LABEL: {{^}}mbcnt_hi_known_bits_1:
+; GCN: v_mbcnt_hi_u32_b32
+; GCN: v_and_b32_e32
+define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 {
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 %y)
%mask = and i32 %hi, 63
ret i32 %mask
}
+; GCN-LABEL: {{^}}mbcnt_hi_known_bits_2:
+; GCN: v_mbcnt_hi_u32_b32
+; GCN-NOT: and
+define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 {
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 0)
+ %mask = and i32 %hi, 63
+ ret i32 %mask
+}
+
+; GCN-LABEL: {{^}}mbcnt_hi_known_bits_3:
+; GCN: v_mbcnt_hi_u32_b32
+; GCN-NOT: and
+define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 {
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15)
+ %mask = and i32 %hi, 127
+ ret i32 %mask
+}
+
+; GCN-LABEL: {{^}}mbcnt_hi_known_bits_4:
+; GCN: v_mbcnt_hi_u32_b32
+; GCN: v_and_b32_e32
+define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 {
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15)
+ %mask = and i32 %hi, 63
+ ret i32 %mask
+}
+
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
More information about the llvm-commits
mailing list