[llvm] [InstCombine] Add support for transforming manual signed mul overflows (PR #93370)

Mon Jul 22 07:55:43 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/93370

>From a3818da726fd1e1b8ed1821e8c4ae8c2a17d4cc8 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 24 May 2024 23:54:06 -0400
Subject: [PATCH 1/2] Pre-commit tests (NFC)

---
 .../Transforms/InstCombine/overflow-mul.ll    | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
index 6b5a65c03ee10..3bc3c290ca8c9 100644
--- a/llvm/test/Transforms/InstCombine/overflow-mul.ll
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -343,3 +343,42 @@ define i32 @extra_and_use_mask_too_large(i32 %x, i32 %y) {
   %retval = zext i1 %overflow to i32
   ret i32 %retval
 }
+
+define i32 @smul(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV3]]
+;
+  %conv = sext i32 %a to i64
+  %conv1 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %1 = add nsw i64 %mul, -2147483648
+  %2 = icmp ult i64 %1, -4294967296
+  %conv3 = zext i1 %2 to i32
+  ret i32 %conv3
+}
+
+define i32 @smul2(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul2(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[MUL]], 2147483647
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 4294967295
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[CONV3]]
+;
+  %conv = sext i32 %a to i64
+  %conv1 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %cmp = icmp sle i64 %mul, 2147483647
+  %cmp2 = icmp sgt i64 %mul, -2147483648
+  %1 = select i1 %cmp, i1 %cmp2, i1 false
+  %conv3 = zext i1 %1 to i32
+  ret i32 %conv3
+}

>From d3ffd99bfacfa27ab6e663bf11a2973187940531 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 24 May 2024 23:50:47 -0400
Subject: [PATCH 2/2] [InstCombine] Add support for transforming manual checks
 for signed mul overflows

Alive2 Proof:
https://alive2.llvm.org/ce/z/m-kd7-
---
 .../InstCombine/InstCombineCompares.cpp       | 176 ++++++++++++++++++
 .../Transforms/InstCombine/overflow-mul.ll    |  19 +-
 2 files changed, 183 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index abadf54a96767..1224ddd7ee6e5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6286,6 +6286,172 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
   return ExtractValueInst::Create(Call, 1);
 }
 
+/// Recognize and process idiom involving test for multiplication
+/// overflow.
+///
+/// The caller has matched a pattern of the form:
+///   I = cmp u add (mul(sext A, sext B), V, W
+/// The function checks if this is a test for overflow and if so replaces
+/// multiplication with call to 'mul.with.overflow' intrinsic.
+///
+/// \param I Compare instruction.
+/// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
+///               the compare instruction.  Must be of integer type.
+/// \param OtherVal The other argument of compare instruction.
+/// \returns Instruction which must replace the compare instruction, NULL if no
+///          replacement required.
+static Instruction *processSMulSExtIdiom(ICmpInst &I, Value *MulVal,
+                                         const APInt *AddVal,
+                                         const APInt *OtherVal,
+                                         InstCombinerImpl &IC) {
+  // Don't bother doing this transformation for pointers, don't do it for
+  // vectors.
+  if (!isa<IntegerType>(MulVal->getType()))
+    return nullptr;
+
+  auto *MulInstr = dyn_cast<Instruction>(MulVal);
+  if (!MulInstr)
+    return nullptr;
+  assert(MulInstr->getOpcode() == Instruction::Mul);
+
+  auto *LHS = cast<SExtInst>(MulInstr->getOperand(0)),
+       *RHS = cast<SExtInst>(MulInstr->getOperand(1));
+  assert(LHS->getOpcode() == Instruction::SExt);
+  assert(RHS->getOpcode() == Instruction::SExt);
+  Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
+
+  // Calculate type and width of the result produced by mul.with.overflow.
+  Type *TyA = A->getType(), *TyB = B->getType();
+  unsigned WidthA = TyA->getPrimitiveSizeInBits(),
+           WidthB = TyB->getPrimitiveSizeInBits();
+  unsigned MulWidth;
+  Type *MulType;
+  if (WidthB > WidthA) {
+    MulWidth = WidthB;
+    MulType = TyB;
+  } else {
+    MulWidth = WidthA;
+    MulType = TyA;
+  }
+
+  // In order to replace the original mul with a narrower mul.with.overflow,
+  // all uses must ignore upper bits of the product.  The number of used low
+  // bits must be not greater than the width of mul.with.overflow.
+  if (MulVal->hasNUsesOrMore(2))
+    for (User *U : MulVal->users()) {
+      if (U == &I)
+        continue;
+      if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+        // Check if truncation ignores bits above MulWidth.
+        unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
+        if (TruncWidth > MulWidth)
+          return nullptr;
+      } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+        // Check if AND ignores bits above MulWidth.
+        if (BO->getOpcode() != Instruction::And)
+          return nullptr;
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          const APInt &CVal = CI->getValue();
+          if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
+            return nullptr;
+        } else {
+          // In this case we could have the operand of the binary operation
+          // being defined in another block, and performing the replacement
+          // could break the dominance relation.
+          return nullptr;
+        }
+      } else {
+        // Other uses prohibit this transformation.
+        return nullptr;
+      }
+    }
+
+  // Recognize patterns
+  bool IsInverse = false;
+  switch (I.getPredicate()) {
+  case ICmpInst::ICMP_ULT: {
+    // Recognize pattern:
+    //   mulval = mul(sext A, sext B)
+    //   addval = add (mulval, min)
+    //   cmp ult addval, -min * 2 + 1
+    APInt MinVal = APInt::getSignedMinValue(MulWidth);
+    MinVal = MinVal.sext(OtherVal->getBitWidth());
+    APInt MinMinVal = APInt::getSignedMinValue(MulWidth + 1);
+    MinMinVal = MinMinVal.sext(OtherVal->getBitWidth());
+    if (MinVal.eq(*AddVal) && MinMinVal.eq(*OtherVal))
+      break; // Recognized
+
+    // Recognize pattern:
+    //   mulval = mul(sext A, sext B)
+    //   addval = add (mulval, signedMax)
+    //   cmp ult addval, unsignedMax
+    APInt MaxVal = APInt::getSignedMaxValue(MulWidth);
+    MaxVal = MaxVal.zext(OtherVal->getBitWidth()) + 1;
+    APInt MaxMaxVal = APInt::getMaxValue(MulWidth);
+    MaxMaxVal = MaxMaxVal.zext(OtherVal->getBitWidth()) + 1;
+    if (MaxVal.eq(*AddVal) && MaxMaxVal.eq(*OtherVal)) {
+      IsInverse = true;
+      break; // Recognized
+    }
+    return nullptr;
+  }
+
+  default:
+    return nullptr;
+  }
+
+  InstCombiner::BuilderTy &Builder = IC.Builder;
+  Builder.SetInsertPoint(MulInstr);
+
+  // Replace: mul(sext A, sext B) --> mul.with.overflow(A, B)
+  Value *MulA = A, *MulB = B;
+  if (WidthA < MulWidth)
+    MulA = Builder.CreateSExt(A, MulType);
+  if (WidthB < MulWidth)
+    MulB = Builder.CreateSExt(B, MulType);
+  Function *F = Intrinsic::getDeclaration(
+      I.getModule(), Intrinsic::smul_with_overflow, MulType);
+  CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "smul");
+  IC.addToWorklist(MulInstr);
+
+  // If there are uses of mul result other than the comparison, we know that
+  // they are truncation or binary AND. Change them to use result of
+  // mul.with.overflow and adjust properly mask/size.
+  if (MulVal->hasNUsesOrMore(2)) {
+    Value *Mul = Builder.CreateExtractValue(Call, 0, "smul.value");
+    for (User *U : make_early_inc_range(MulVal->users())) {
+      if (U == &I)
+        continue;
+      if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+        if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
+          IC.replaceInstUsesWith(*TI, Mul);
+        else
+          TI->setOperand(0, Mul);
+      } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+        assert(BO->getOpcode() == Instruction::And);
+        // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+        ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+        APInt ShortMask = CI->getValue().trunc(MulWidth);
+        Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
+        Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
+        IC.replaceInstUsesWith(*BO, Zext);
+      } else {
+        llvm_unreachable("Unexpected Binary operation");
+      }
+      IC.addToWorklist(cast<Instruction>(U));
+    }
+  }
+
+  // The original icmp gets replaced with the overflow value, maybe inverted
+  // depending on predicate.
+  if (IsInverse) {
+    Value *Res = Builder.CreateExtractValue(Call, 1);
+    return BinaryOperator::CreateNot(Res);
+  }
+
+  return ExtractValueInst::Create(Call, 1);
+}
+
 /// When performing a comparison against a constant, it is possible that not all
 /// the bits in the LHS are demanded. This helper method computes the mask that
 /// IS demanded.
@@ -7531,6 +7697,16 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
         return R;
     }
 
+    // (sext X) * (sext Y)  --> llvm.smul.with.overflow.
+    const APInt *C1;
+    if (match(Op0, m_Add(m_NSWMul(m_SExt(m_Value(X)), m_SExt(m_Value(Y))),
+                         m_APInt(C))) &&
+        match(Op1, m_APInt(C1))) {
+      if (Instruction *R = processSMulSExtIdiom(
+              I, cast<Instruction>(Op0)->getOperand(0), C, C1, *this))
+        return R;
+    }
+
     // Signbit test folds
     // Fold (X u>> BitWidth - 1 Pred ZExt(i1))  -->  X s< 0 Pred i1
     // Fold (X s>> BitWidth - 1 Pred SExt(i1))  -->  X s< 0 Pred i1
diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
index 3bc3c290ca8c9..582697b3d81e8 100644
--- a/llvm/test/Transforms/InstCombine/overflow-mul.ll
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -346,12 +346,9 @@ define i32 @extra_and_use_mask_too_large(i32 %x, i32 %y) {
 
 define i32 @smul(i32 %a, i32 %b) {
 ; CHECK-LABEL: @smul(
-; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
-; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    [[SMUL:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i1 } [[SMUL]], 1
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
   %conv = sext i32 %a to i64
@@ -365,11 +362,9 @@ define i32 @smul(i32 %a, i32 %b) {
 
 define i32 @smul2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @smul2(
-; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[MUL]], 2147483647
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 4294967295
+; CHECK-NEXT:    [[SMUL:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i1 } [[SMUL]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
@@ -377,7 +372,7 @@ define i32 @smul2(i32 %a, i32 %b) {
   %conv1 = sext i32 %b to i64
   %mul = mul nsw i64 %conv1, %conv
   %cmp = icmp sle i64 %mul, 2147483647
-  %cmp2 = icmp sgt i64 %mul, -2147483648
+  %cmp2 = icmp sge i64 %mul, -2147483648
   %1 = select i1 %cmp, i1 %cmp2, i1 false
   %conv3 = zext i1 %1 to i32
   ret i32 %conv3