[llvm] de30384 - [AMDGPU] Avoid redundant calls to numBits in AMDGPUCodeGenPrepare::replaceMulWithMul24().
Abinav Puthan Purayil via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 15 07:20:00 PDT 2021
Author: Abinav Puthan Purayil
Date: 2021-10-15T19:49:44+05:30
New Revision: de3038400b163d91dca743210ce9b10001411208
URL: https://github.com/llvm/llvm-project/commit/de3038400b163d91dca743210ce9b10001411208
DIFF: https://github.com/llvm/llvm-project/commit/de3038400b163d91dca743210ce9b10001411208.diff
LOG: [AMDGPU] Avoid redundant calls to numBits in AMDGPUCodeGenPrepare::replaceMulWithMul24().
The isU24() and isI24() calls numBits to make its decision. This change
replaces them with the internal numBits call so that we can use its
result for the > 32 bit width cases.
Differential Revision: https://reviews.llvm.org/D111864
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 875bc8ef4ff98..ba0afbc72b24d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -151,8 +151,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
- bool isI24(Value *V, unsigned ScalarSize) const;
- bool isU24(Value *V, unsigned ScalarSize) const;
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
/// SelectionDAG has an issue where an and asserting the bits are known
@@ -454,16 +452,6 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
}
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
- return ScalarSize >= 24 && // Types less than 24-bit should be treated
- // as unsigned 24-bit values.
- numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
- return numBitsUnsigned(V, ScalarSize) <= 24;
-}
-
static void extractValues(IRBuilder<> &Builder,
SmallVectorImpl<Value *> &Values, Value *V) {
auto *VT = dyn_cast<FixedVectorType>(V->getType());
@@ -509,23 +497,25 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
- if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
+ unsigned LHSBits = 0, RHSBits = 0;
+
+ if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
// The mul24 instruction yields the low-order 32 bits. If the original
// result and the destination is wider than 32 bits, the mul24 would
// truncate the result.
- if (Size > 32 &&
- numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) {
+ if (Size > 32 && LHSBits + RHSBits > 32)
return false;
- }
IntrID = Intrinsic::amdgcn_mul_u24;
- } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
+ } else if (ST->hasMulI24() &&
+ (LHSBits = numBitsSigned(LHS, Size)) < 24 &&
+ (RHSBits = numBitsSigned(RHS, Size)) < 24) {
// The original result is positive if its destination is wider than 32 bits
// and its highest set bit is at bit 31. Generating mul24 and sign-extending
// it would yield a negative value.
- if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 30) {
+ if (Size > 32 && LHSBits + RHSBits > 30)
return false;
- }
IntrID = Intrinsic::amdgcn_mul_i24;
} else
More information about the llvm-commits
mailing list