[llvm] [AMDGPU] Don't create mulhi_24 in CGP (PR #72983)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 21 04:44:59 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
Instead, create a mul24 with a 64 bit result and let ISel take care of it.
See discussion in #<!-- -->72393 for context.
---
Patch is 40.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72983.diff
8 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+13-47)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td (+7-3)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+11)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll (+61-135)
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+9-8)
- (modified) llvm/test/CodeGen/AMDGPU/mul_int24.ll (+15-42)
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (+3-6)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f10bc7c75eb199b..57bb8abee8f9413 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1993,12 +1993,14 @@ def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">,
[IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
+// mul24 intrinsics can return i32 or i64.
+// When returning i64, they're lowered to a mul24/mulhi24 pair.
+def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
+def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 4cce34bdeabcf44..4caa9cd9225b690 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -624,34 +624,6 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}
-// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
-// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
-// width of the original destination.
-static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
- unsigned Size, unsigned NumBits, bool IsSigned) {
- if (Size <= 32 || NumBits <= 32) {
- Intrinsic::ID ID =
- IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
- return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
- }
-
- assert(NumBits <= 48);
-
- Intrinsic::ID LoID =
- IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
- Intrinsic::ID HiID =
- IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
-
- Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
- Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
-
- IntegerType *I64Ty = Builder.getInt64Ty();
- Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
- Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
-
- return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
-}
-
bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -691,26 +663,20 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, RHSVals, RHS);
IntegerType *I32Ty = Builder.getInt32Ty();
- for (int I = 0, E = LHSVals.size(); I != E; ++I) {
- Value *LHS, *RHS;
- if (IsSigned) {
- LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
- } else {
- LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
- }
+ IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
+ Type *DstTy = LHSVals[0]->getType();
- Value *Result =
- getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
-
- if (IsSigned) {
- ResultVals.push_back(
- Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
- } else {
- ResultVals.push_back(
- Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
- }
+ for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+ Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
+ : Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
+ : Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+ Intrinsic::ID ID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
+ Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
+ : Builder.CreateZExtOrTrunc(Result, DstTy);
+ ResultVals.push_back(Result);
}
Value *NewVal = insertValues(Builder, Ty, ResultVals);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 324285e580bbaff..fd38739876c4d57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -281,11 +281,15 @@ def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
-// when performing the multiply. The result is a 32-bit value.
-def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
+// when performing the multiply. The result is a 32 or 64 bit value.
+def AMDGPUMul24Op : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
+]>;
+
+def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
+def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index b97d979b0336b6a..5911a4f004819c8 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -862,6 +862,17 @@ def : divergent_i64_BinOp <and, V_AND_B32_e64>;
def : divergent_i64_BinOp <or, V_OR_B32_e64>;
def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
+// mul24 w/ 64 bit output.
+class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat<
+ (i64 (Op i32:$src0, i32:$src1)),
+ (REG_SEQUENCE VReg_64,
+ (InstLo $src0, $src1), sub0,
+ (InstHi $src0, $src1), sub1)
+>;
+
+def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e32, V_MUL_HI_I32_I24_e32>;
+def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e32, V_MUL_HI_U32_U24_e32>;
+
//===----------------------------------------------------------------------===//
// 16-Bit Operand Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
index 62bc145f1387efe..d938c16bf6134c7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -7,7 +7,7 @@ define i16 @mul_i16(i16 %lhs, i16 %rhs) {
; SI-LABEL: @mul_i16(
; SI-NEXT: [[TMP1:%.*]] = zext i16 [[LHS:%.*]] to i32
; SI-NEXT: [[TMP2:%.*]] = zext i16 [[RHS:%.*]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i16
; SI-NEXT: ret i16 [[MUL]]
;
@@ -29,7 +29,7 @@ define i32 @smul24_i32(i32 %lhs, i32 %rhs) {
; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
; SI-NEXT: [[SHL_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[SHL_RHS]], 8
-; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
+; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[LHS24]], i32 [[RHS24]])
; SI-NEXT: ret i32 [[MUL]]
;
; VI-LABEL: @smul24_i32(
@@ -37,7 +37,7 @@ define i32 @smul24_i32(i32 %lhs, i32 %rhs) {
; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
; VI-NEXT: [[SHL_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[SHL_RHS]], 8
-; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
+; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[LHS24]], i32 [[RHS24]])
; VI-NEXT: ret i32 [[MUL]]
;
; DISABLED-LABEL: @smul24_i32(
@@ -61,7 +61,7 @@ define <2 x i8> @mul_v1i16(<1 x i16> %arg) {
; SI-NEXT: BB:
; SI-NEXT: [[TMP0:%.*]] = extractelement <1 x i16> [[ARG:%.*]], i64 0
; SI-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
-; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 42)
+; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP1]], i32 42)
; SI-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; SI-NEXT: [[MUL:%.*]] = insertelement <1 x i16> poison, i16 [[TMP3]], i64 0
; SI-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8>
@@ -90,7 +90,7 @@ define <1 x i8> @mul_v1i8(<1 x i8> %arg) {
; SI-NEXT: BB:
; SI-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ARG:%.*]], i64 0
; SI-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
-; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 42)
+; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP1]], i32 42)
; SI-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
; SI-NEXT: [[MUL:%.*]] = insertelement <1 x i8> poison, i8 [[TMP3]], i64 0
; SI-NEXT: ret <1 x i8> [[MUL]]
@@ -120,8 +120,8 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
-; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
-; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
+; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[TMP1]], i32 [[TMP3]])
+; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[TMP2]], i32 [[TMP4]])
; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
; SI-NEXT: ret <2 x i32> [[MUL]]
@@ -135,8 +135,8 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
-; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
-; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
+; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[TMP1]], i32 [[TMP3]])
+; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24.i32(i32 [[TMP2]], i32 [[TMP4]])
; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
; VI-NEXT: ret <2 x i32> [[MUL]]
@@ -161,13 +161,13 @@ define i32 @umul24_i32(i32 %lhs, i32 %rhs) {
; SI-LABEL: @umul24_i32(
; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
-; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
+; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[LHS24]], i32 [[RHS24]])
; SI-NEXT: ret i32 [[MUL]]
;
; VI-LABEL: @umul24_i32(
; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
-; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
+; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[LHS24]], i32 [[RHS24]])
; VI-NEXT: ret i32 [[MUL]]
;
; DISABLED-LABEL: @umul24_i32(
@@ -190,8 +190,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
-; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
-; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
+; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP1]], i32 [[TMP3]])
+; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP2]], i32 [[TMP4]])
; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
; SI-NEXT: ret <2 x i32> [[MUL]]
@@ -203,8 +203,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
-; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
-; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
+; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP1]], i32 [[TMP3]])
+; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24.i32(i32 [[TMP2]], i32 [[TMP4]])
; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
; VI-NEXT: ret <2 x i32> [[MUL]]
@@ -229,12 +229,7 @@ define i64 @smul24_i64(i64 %lhs, i64 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[SHL_RHS]], 40
; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; SI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: ret i64 [[MUL]]
;
; VI-LABEL: @smul24_i64(
@@ -244,12 +239,7 @@ define i64 @smul24_i64(i64 %lhs, i64 %rhs) {
; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[SHL_RHS]], 40
; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; VI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; VI-NEXT: ret i64 [[MUL]]
;
; DISABLED-LABEL: @smul24_i64(
@@ -276,8 +266,7 @@ define i64 @smul24_i64_2(i64 %lhs, i64 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[SHL_RHS]], 49
; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64
+; SI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: ret i64 [[MUL]]
;
; VI-LABEL: @smul24_i64_2(
@@ -287,8 +276,7 @@ define i64 @smul24_i64_2(i64 %lhs, i64 %rhs) {
; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[SHL_RHS]], 49
; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64
+; VI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; VI-NEXT: ret i64 [[MUL]]
;
; DISABLED-LABEL: @smul24_i64_2(
@@ -315,12 +303,7 @@ define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; SI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: ret i64 [[MUL]]
;
; VI-LABEL: @smul24_i64_3(
@@ -330,12 +313,7 @@ define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
; VI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; VI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.i24.i64(i32 [[TMP1]], i32 [[TMP2]])
; VI-NEXT: ret i64 [[MUL]]
;
; DISABLED-LABEL: @smul24_i64_3(
@@ -393,12 +371,7 @@ define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; SI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.u24.i64(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: ret i64 [[MUL]]
;
; VI-LABEL: @umul24_i64(
@@ -406,12 +379,7 @@ define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
-; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
-; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; VI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.u24.i64(i32 [[TMP1]], i32 [[TMP2]])
; VI-NEXT: ret i64 [[MUL]]
;
; DISABLED-LABEL: @umul24_i64(
@@ -432,8 +400,7 @@ define i64 @umul24_i64_2(i64 %lhs, i64 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 65535
; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
-; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64
+; SI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.u24.i64(i32 [[TMP1]], i32 [[TMP2]])
; SI-NEXT: ret i64 [[MUL]]
;
; VI-LABEL: @umul24_i64_2(
@@ -441,8 +408,7 @@ define i64 @umul24_i64_2(i64 %lhs, i64 %rhs) {
; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 65535
; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
-; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
-; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64
+; VI-NEXT: [[MUL:%.*]] = call i64 @llvm.amdgcn.mul.u24.i64(i32 [[TMP1]], i32 [[TMP2]])
; VI-NEXT: ret i64 [[MUL]]
;
; DISABLED-LABEL: @umul24_i64_2(
@@ -465,7 +431,7 @@ define i31 @smul24_i31(i31 %lhs, i31 %rhs) {
; SI-NEXT: [[RHS24:%.*]] = ashr i31 [[SHL_RHS]], 7
; SI-NEXT: [[TMP1:%.*]] = s...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/72983
More information about the llvm-commits
mailing list