[llvm] abff766 - AMDGPU: Implement known bits functions for min3/max3/med3
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 10 07:59:07 PDT 2023
Author: Matt Arsenault
Date: 2023-06-10T10:58:44-04:00
New Revision: abff7668aba505a97895eab2476afa4fb1611720
URL: https://github.com/llvm/llvm-project/commit/abff7668aba505a97895eab2476afa4fb1611720
DIFF: https://github.com/llvm/llvm-project/commit/abff7668aba505a97895eab2476afa4fb1611720.diff
LOG: AMDGPU: Implement known bits functions for min3/max3/med3
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir
llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a00a24f55662f..f94dc4f81b286 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,6 +18,7 @@
#include "AMDGPUMachineFunction.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -4964,6 +4965,29 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
Known.Zero.setLowBits(Log2(Alignment));
break;
}
+ case AMDGPUISD::SMIN3:
+ case AMDGPUISD::SMAX3:
+ case AMDGPUISD::SMED3:
+ case AMDGPUISD::UMIN3:
+ case AMDGPUISD::UMAX3:
+ case AMDGPUISD::UMED3: {
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
+ if (Known2.isUnknown())
+ break;
+
+ KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Known1.isUnknown())
+ break;
+
+ KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Known0.isUnknown())
+ break;
+
+ // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
+ Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
+ Known.One = Known0.One & Known1.One & Known2.One;
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IID) {
@@ -5018,6 +5042,26 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 16;
case AMDGPUISD::FP_TO_FP16:
return 16;
+ case AMDGPUISD::SMIN3:
+ case AMDGPUISD::SMAX3:
+ case AMDGPUISD::SMED3:
+ case AMDGPUISD::UMIN3:
+ case AMDGPUISD::UMAX3:
+ case AMDGPUISD::UMED3: {
+ unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1);
+ if (Tmp2 == 1)
+ return 1; // Early out.
+
+ unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (Tmp1 == 1)
+ return 1; // Early out.
+
+ unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp0 == 1)
+ return 1; // Early out.
+
+ return std::min(Tmp0, std::min(Tmp1, Tmp2));
+ }
default:
return 1;
}
@@ -5041,6 +5085,20 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
return 24;
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
return 16;
+ case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_UMED3: {
+ auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
+ unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Tmp2 == 1)
+ return 1;
+ unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Tmp1 == 1)
+ return 1;
+ unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
+ if (Tmp0 == 1)
+ return 1;
+ return std::min(Tmp0, std::min(Tmp1, Tmp2));
+ }
default:
return 1;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0b05f96e98084..6a3a6dd924529 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13088,6 +13088,30 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
Known.Zero.setHighBits(16);
break;
+ case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_UMED3: {
+ auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
+
+ KnownBits Known2;
+ KB.computeKnownBitsImpl(Src2, Known2, DemandedElts, Depth + 1);
+ if (Known2.isUnknown())
+ break;
+
+ KnownBits Known1;
+ KB.computeKnownBitsImpl(Src1, Known1, DemandedElts, Depth + 1);
+ if (Known1.isUnknown())
+ break;
+
+ KnownBits Known0;
+ KB.computeKnownBitsImpl(Src0, Known0, DemandedElts, Depth + 1);
+ if (Known0.isUnknown())
+ break;
+
+ // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
+ Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
+ Known.One = Known0.One & Known1.One & Known2.One;
+ break;
+ }
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir
index 7daf638ec1078..0c0f96a808a96 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/compute-num-sign-bits-med3.mir
@@ -17,8 +17,7 @@ body: |
; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -255
; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val0, %val1, %val2
- ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 9
- ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32)
%val:_(s32) = COPY $vgpr0
%val0:_(s32) = G_SEXT_INREG %val, 8
%val1:_(s32) = G_CONSTANT i32 -255
@@ -45,8 +44,7 @@ body: |
; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -255
; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val0, %val2
- ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 9
- ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32)
%val:_(s32) = COPY $vgpr0
%val0:_(s32) = G_SEXT_INREG %val, 8
%val1:_(s32) = G_CONSTANT i32 -255
@@ -73,8 +71,7 @@ body: |
; CHECK-NEXT: %val1:_(s32) = G_CONSTANT i32 -256
; CHECK-NEXT: %val2:_(s32) = G_CONSTANT i32 128
; CHECK-NEXT: %smed3:_(s32) = G_AMDGPU_SMED3 %val1, %val2, %val0
- ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %smed3, 9
- ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ ; CHECK-NEXT: $vgpr0 = COPY %smed3(s32)
%val:_(s32) = COPY $vgpr0
%val0:_(s32) = G_SEXT_INREG %val, 8
%val1:_(s32) = G_CONSTANT i32 -256
diff --git a/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll b/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
index d6343ce7bca63..e64bc0dc374da 100644
--- a/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
@@ -17,9 +17,8 @@ define i32 @v_known_bits_umed3(i8 %a) {
; SI-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x80
; SI-SDAG-NEXT: v_med3_u32 v0, v0, 32, v1
-; SI-SDAG-NEXT: v_add_i32_e32 v0, vcc, 0x80, v0
; SI-SDAG-NEXT: s_mov_b32 m0, -1
-; SI-SDAG-NEXT: ds_read_u8 v0, v0
+; SI-SDAG-NEXT: ds_read_u8 v0, v0 offset:128
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -29,8 +28,7 @@ define i32 @v_known_bits_umed3(i8 %a) {
; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x80
; SI-GISEL-NEXT: v_med3_u32 v0, v0, 32, v1
-; SI-GISEL-NEXT: v_add_i32_e32 v0, vcc, 0x80, v0
-; SI-GISEL-NEXT: ds_read_u8 v0, v0
+; SI-GISEL-NEXT: ds_read_u8 v0, v0 offset:128
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
%ext.a = zext i8 %a to i32
@@ -55,37 +53,23 @@ define i32 @v_known_signbits_smed3(i16 %a, i16 %b) {
; SI-SDAG-NEXT: s_movk_i32 s4, 0xffc0
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x80
; SI-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
-; SI-SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SI-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; SI-SDAG-NEXT: v_xor_b32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cvt_f32_u32_e32 v3, v1
-; SI-SDAG-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
-; SI-SDAG-NEXT: s_movk_i32 s4, 0xffe0
-; SI-SDAG-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_i32_e32 v2, v1
; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-SDAG-NEXT: s_movk_i32 s4, 0xffe0
; SI-SDAG-NEXT: v_med3_i32 v0, v0, s4, 64
-; SI-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; SI-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3
-; SI-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; SI-SDAG-NEXT: v_xor_b32_e32 v0, v0, v5
-; SI-SDAG-NEXT: v_mul_lo_u32 v4, v4, v3
-; SI-SDAG-NEXT: v_mul_hi_u32 v4, v3, v4
-; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; SI-SDAG-NEXT: v_mul_hi_u32 v3, v0, v3
-; SI-SDAG-NEXT: v_mul_lo_u32 v4, v3, v1
-; SI-SDAG-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; SI-SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; SI-SDAG-NEXT: v_sub_i32_e32 v4, vcc, v0, v1
-; SI-SDAG-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; SI-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v3
-; SI-SDAG-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
-; SI-SDAG-NEXT: v_xor_b32_e32 v1, v5, v2
+; SI-SDAG-NEXT: v_cvt_f32_i32_e32 v3, v0
+; SI-SDAG-NEXT: v_rcp_iflag_f32_e32 v4, v2
; SI-SDAG-NEXT: v_xor_b32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; SI-SDAG-NEXT: v_ashrrev_i32_e32 v0, 30, v0
+; SI-SDAG-NEXT: v_or_b32_e32 v0, 1, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v3, v4
+; SI-SDAG-NEXT: v_trunc_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mad_f32 v3, -v1, v2, v3
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2|
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_add_i32_e32 v0, vcc, v1, v0
+; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_known_signbits_smed3:
More information about the llvm-commits
mailing list