[llvm] de30384 - [AMDGPU] Avoid redundant calls to numBits in AMDGPUCodeGenPrepare::replaceMulWithMul24().

Fri Oct 15 07:20:00 PDT 2021

Author: Abinav Puthan Purayil
Date: 2021-10-15T19:49:44+05:30
New Revision: de3038400b163d91dca743210ce9b10001411208

URL: https://github.com/llvm/llvm-project/commit/de3038400b163d91dca743210ce9b10001411208
DIFF: https://github.com/llvm/llvm-project/commit/de3038400b163d91dca743210ce9b10001411208.diff

LOG: [AMDGPU] Avoid redundant calls to numBits in AMDGPUCodeGenPrepare::replaceMulWithMul24().

The isU24() and isI24() calls numBits to make its decision. This change
replaces them with the internal numBits call so that we can use its
result for the > 32 bit width cases.

Differential Revision: https://reviews.llvm.org/D111864

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 875bc8ef4ff98..ba0afbc72b24d 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -151,8 +151,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
 
   unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
   unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
-  bool isI24(Value *V, unsigned ScalarSize) const;
-  bool isU24(Value *V, unsigned ScalarSize) const;
 
   /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
   /// SelectionDAG has an issue where an and asserting the bits are known
@@ -454,16 +452,6 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
   return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
 }
 
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
-  return ScalarSize >= 24 && // Types less than 24-bit should be treated
-                                     // as unsigned 24-bit values.
-    numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
-  return numBitsUnsigned(V, ScalarSize) <= 24;
-}
-
 static void extractValues(IRBuilder<> &Builder,
                           SmallVectorImpl<Value *> &Values, Value *V) {
   auto *VT = dyn_cast<FixedVectorType>(V->getType());
@@ -509,23 +497,25 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
 
   Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
 
-  if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
+  unsigned LHSBits = 0, RHSBits = 0;
+
+  if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
+      (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
     // The mul24 instruction yields the low-order 32 bits. If the original
     // result and the destination is wider than 32 bits, the mul24 would
     // truncate the result.
-    if (Size > 32 &&
-        numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) {
+    if (Size > 32 && LHSBits + RHSBits > 32)
       return false;
-    }
 
     IntrID = Intrinsic::amdgcn_mul_u24;
-  } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
+  } else if (ST->hasMulI24() &&
+             (LHSBits = numBitsSigned(LHS, Size)) < 24 &&
+             (RHSBits = numBitsSigned(RHS, Size)) < 24) {
     // The original result is positive if its destination is wider than 32 bits
     // and its highest set bit is at bit 31. Generating mul24 and sign-extending
     // it would yield a negative value.
-    if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 30) {
+    if (Size > 32 && LHSBits + RHSBits > 30)
       return false;
-    }
 
     IntrID = Intrinsic::amdgcn_mul_i24;
   } else