[llvm] 0379263 - [AMDGPU] Fix width check for signed mul24 generation.

Fri Oct 15 06:28:21 PDT 2021

Author: Abinav Puthan Purayil
Date: 2021-10-15T18:53:41+05:30
New Revision: 0379263f23a667ff5823c96a3e5819f65eadd243

URL: https://github.com/llvm/llvm-project/commit/0379263f23a667ff5823c96a3e5819f65eadd243
DIFF: https://github.com/llvm/llvm-project/commit/0379263f23a667ff5823c96a3e5819f65eadd243.diff

LOG: [AMDGPU] Fix width check for signed mul24 generation.

This changes fixes a case in which the highest set bit of the original
result is at bit 31 and sign-extending the mul24 for it would make the
result negative.

Differential Revision: https://reviews.llvm.org/D111823

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index ff25bbf84d64c..875bc8ef4ff98 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -509,10 +509,10 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
 
   Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
 
-  // TODO: Should this try to match mulhi24?
   if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
-    // The 24-bit mul intrinsics yields the low-order 32 bits. The result's bit
-    // width should not exceed 32 if `Size` > 32.
+    // The mul24 instruction yields the low-order 32 bits. If the original
+    // result and the destination is wider than 32 bits, the mul24 would
+    // truncate the result.
     if (Size > 32 &&
         numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) {
       return false;
@@ -520,7 +520,10 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
 
     IntrID = Intrinsic::amdgcn_mul_u24;
   } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
-    if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 31) {
+    // The original result is positive if its destination is wider than 32 bits
+    // and its highest set bit is at bit 31. Generating mul24 and sign-extending
+    // it would yield a negative value.
+    if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 30) {
       return false;
     }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
index 55a40333be8f7..fa14e968948af 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -239,6 +239,39 @@ define i64 @smul24_i64_2(i64 %lhs, i64 %rhs) {
   ret i64 %mul
 }
 
+define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
+; SI-LABEL: @smul24_i64_3(
+; SI-NEXT:    [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; SI-NEXT:    [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; SI-NEXT:    [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; SI-NEXT:    [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; SI-NEXT:    [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; SI-NEXT:    ret i64 [[MUL]]
+;
+; VI-LABEL: @smul24_i64_3(
+; VI-NEXT:    [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; VI-NEXT:    [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; VI-NEXT:    [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; VI-NEXT:    [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; VI-NEXT:    [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; VI-NEXT:    ret i64 [[MUL]]
+;
+; DISABLED-LABEL: @smul24_i64_3(
+; DISABLED-NEXT:    [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; DISABLED-NEXT:    [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; DISABLED-NEXT:    [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; DISABLED-NEXT:    [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; DISABLED-NEXT:    [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; DISABLED-NEXT:    ret i64 [[MUL]]
+;
+  %lhs.trunc = trunc i64 %lhs to i16
+  %lhs24 = sext i16 %lhs.trunc to i64
+  %rhs.trunc = trunc i64 %rhs to i17
+  %rhs24 = sext i17 %rhs.trunc to i64
+  %mul = mul i64 %lhs24, %rhs24
+  ret i64 %mul
+}
+
 define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
 ; SI-LABEL: @umul24_i64(
 ; SI-NEXT:    [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215