[llvm] 454256e - [AMDGPU] Correct the known bits calculation for MUL_I24.

Fri Jan 14 08:55:26 PST 2022

Author: Craig Topper
Date: 2022-01-14T08:54:54-08:00
New Revision: 454256ef4f89e9d6a23a4b0f5bce52e0acafa755

URL: https://github.com/llvm/llvm-project/commit/454256ef4f89e9d6a23a4b0f5bce52e0acafa755
DIFF: https://github.com/llvm/llvm-project/commit/454256ef4f89e9d6a23a4b0f5bce52e0acafa755.diff

LOG: [AMDGPU] Correct the known bits calculation for MUL_I24.

I'm not entirely sure, but based on how ComputeNumSignBits handles
ISD::MUL, I believe this code was miscounting the number of sign
bits.

As an example of an incorrect result let's say that countMinSignBits
returned 1 for the left hand side and 24 for the right hand side.
LHSValBits would be 23 and RHSValBits would be 0 and the sum would
be 23. This would cause the code to set 9 high bits as zero/one. Now
suppose the real values for the left side is 0x800000 and the right
hand side is 0xffffff. The product is 0x00800000 which has 8 sign bits
not 9.

The number of valid bits for the left and right operands is now
the number of non-sign bits + 1. If the sum of the valid bits of
the left and right sides exceeds 32, then the result may overflow and we
can't say anything about the sign of the result. If the sum is 32
or less then it won't overflow and we know the result has at least
1 sign bit.

For the previous example, the code will now calculate the left
side valid bits as 24 and the right side as 1. The sum will be 25
and the sign bits will be 32 - 25 + 1 which is 8, the correct value.

Differential Revision: https://reviews.llvm.org/D116469

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index cec7458eb9c29..63e8c85b84137 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4626,11 +4626,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     RHSKnown = RHSKnown.trunc(24);
 
     if (Opc == AMDGPUISD::MUL_I24) {
-      unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
-      unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
-      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
-      if (MaxValBits >= 32)
+      unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
+      unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
+      unsigned MaxValBits = LHSValBits + RHSValBits;
+      if (MaxValBits > 32)
         break;
+      unsigned SignBits = 32 - MaxValBits + 1;
       bool LHSNegative = LHSKnown.isNegative();
       bool LHSNonNegative = LHSKnown.isNonNegative();
       bool LHSPositive = LHSKnown.isStrictlyPositive();
@@ -4639,16 +4640,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
       bool RHSPositive = RHSKnown.isStrictlyPositive();
 
       if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
-        Known.Zero.setHighBits(32 - MaxValBits);
+        Known.Zero.setHighBits(SignBits);
       else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
-        Known.One.setHighBits(32 - MaxValBits);
+        Known.One.setHighBits(SignBits);
     } else {
-      unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
-      unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
-      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+      unsigned LHSValBits = LHSKnown.countMaxActiveBits();
+      unsigned RHSValBits = RHSKnown.countMaxActiveBits();
+      unsigned MaxValBits = LHSValBits + RHSValBits;
       if (MaxValBits >= 32)
         break;
-      Known.Zero.setHighBits(32 - MaxValBits);
+      Known.Zero.setBitsFrom(MaxValBits);
     }
     break;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
index ce5d952408dd1..c2d520e4fe14f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
@@ -33,7 +33,11 @@ define i32 @f(i32 %x, i32 %y) {
 ; GCN-LABEL: f:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_mov_b32 s4, 0xffff80
+; GCN-NEXT:    v_or_b32_e32 v0, s4, v0
+; GCN-NEXT:    v_or_b32_e32 v1, s4, v1
+; GCN-NEXT:    v_mul_i32_i24_e32 v0, v0, v1
+; GCN-NEXT:    v_lshrrev_b32_e32 v0, 14, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %xx = or i32 %x, -128 ; 0xffffff80
   %yy = or i32 %y, -128 ; 0xffffff80