[llvm] 2da6ef3 - [AMDGPU] Add 24-bit mulhi intrinsics in INTRINSIC_WO_CHAIN combine.
Abinav Puthan Purayil via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 28 04:28:46 PDT 2021
Author: Abinav Puthan Purayil
Date: 2021-10-28T16:57:48+05:30
New Revision: 2da6ef3664333cc36c449b271c50a72dd7f61940
URL: https://github.com/llvm/llvm-project/commit/2da6ef3664333cc36c449b271c50a72dd7f61940
DIFF: https://github.com/llvm/llvm-project/commit/2da6ef3664333cc36c449b271c50a72dd7f61940.diff
LOG: [AMDGPU] Add 24-bit mulhi intrinsics in INTRINSIC_WO_CHAIN combine.
mul24 intrinsic's operands are simplified by
AMDGPUTargetLowering::performIntrinsicWOChainCombine(). This change adds
the mul24hi intrinsics in the combine since its operands can be
simplified like that of the mul24 intrinsics.
Differential Revision: https://reviews.llvm.org/D112702
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 49abb1e00890..1632362109fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2897,8 +2897,22 @@ static SDValue simplifyMul24(SDNode *Node24,
unsigned NewOpcode = Node24->getOpcode();
if (IsIntrin) {
unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
- NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ?
- AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+ switch (IID) {
+ case Intrinsic::amdgcn_mul_i24:
+ NewOpcode = AMDGPUISD::MUL_I24;
+ break;
+ case Intrinsic::amdgcn_mul_u24:
+ NewOpcode = AMDGPUISD::MUL_U24;
+ break;
+ case Intrinsic::amdgcn_mulhi_i24:
+ NewOpcode = AMDGPUISD::MULHI_I24;
+ break;
+ case Intrinsic::amdgcn_mulhi_u24:
+ NewOpcode = AMDGPUISD::MULHI_U24;
+ break;
+ default:
+ llvm_unreachable("Expected 24-bit mul intrinsic");
+ }
}
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
@@ -3107,6 +3121,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
+ case Intrinsic::amdgcn_mulhi_i24:
+ case Intrinsic::amdgcn_mulhi_u24:
return simplifyMul24(N, DCI);
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_rsq:
diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
index 7c15e731a555..eaa45b929b2b 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
@@ -575,11 +575,9 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) {
; GCN-LABEL: test_umul48_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s4, 0xffffff
-; GCN-NEXT: v_and_b32_e32 v1, s4, v0
-; GCN-NEXT: v_and_b32_e32 v3, s4, v2
-; GCN-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v3
+; GCN-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v2
+; GCN-NEXT: v_mov_b32_e32 v0, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
%lhs24 = and i64 %lhs, 16777215
%rhs24 = and i64 %rhs, 16777215
@@ -588,49 +586,16 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) {
}
define <2 x i64> @test_umul48_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; SI-LABEL: test_umul48_v2i64:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s4, 0xffffff
-; SI-NEXT: v_mul_u32_u24_e32 v5, v0, v4
-; SI-NEXT: v_mul_u32_u24_e32 v7, v2, v6
-; SI-NEXT: v_and_b32_e32 v2, s4, v2
-; SI-NEXT: v_and_b32_e32 v0, s4, v0
-; SI-NEXT: v_and_b32_e32 v3, s4, v6
-; SI-NEXT: v_and_b32_e32 v1, s4, v4
-; SI-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
-; SI-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v3
-; SI-NEXT: v_mov_b32_e32 v0, v5
-; SI-NEXT: v_mov_b32_e32 v2, v7
-; SI-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: test_umul48_v2i64:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: s_mov_b32 s4, 0xffffff
-; VI-NEXT: v_and_b32_e32 v3, s4, v2
-; VI-NEXT: v_and_b32_e32 v1, s4, v0
-; VI-NEXT: v_and_b32_e32 v5, s4, v6
-; VI-NEXT: v_and_b32_e32 v7, s4, v4
-; VI-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; VI-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7
-; VI-NEXT: v_mul_u32_u24_e32 v2, v2, v6
-; VI-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_umul48_v2i64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0xffffff
-; GFX9-NEXT: v_and_b32_e32 v3, s4, v2
-; GFX9-NEXT: v_and_b32_e32 v1, s4, v0
-; GFX9-NEXT: v_and_b32_e32 v5, s4, v6
-; GFX9-NEXT: v_and_b32_e32 v7, s4, v4
-; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7
-; GFX9-NEXT: v_mul_u32_u24_e32 v2, v2, v6
-; GFX9-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_umul48_v2i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_u32_u24_e32 v5, v0, v4
+; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v4
+; GCN-NEXT: v_mul_u32_u24_e32 v4, v2, v6
+; GCN-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v6
+; GCN-NEXT: v_mov_b32_e32 v0, v5
+; GCN-NEXT: v_mov_b32_e32 v2, v4
+; GCN-NEXT: s_setpc_b64 s[30:31]
%lhs24 = and <2 x i64> %lhs, <i64 16777215, i64 16777215>
%rhs24 = and <2 x i64> %rhs, <i64 16777215, i64 16777215>
%mul = mul <2 x i64> %lhs24, %rhs24
More information about the llvm-commits
mailing list