[llvm] r363811 - [ConstantFolding] Add constant folding for smul.fix and smul.fix.sat

Wed Jun 19 07:28:03 PDT 2019

Author: bjope
Date: Wed Jun 19 07:28:03 2019
New Revision: 363811

URL: http://llvm.org/viewvc/llvm-project?rev=363811&view=rev
Log:
[ConstantFolding] Add constant folding for smul.fix and smul.fix.sat

Summary:
This patch teaches ConstantFolding to constant fold
both scalar and vector variants of llvm.smul.fix and
llvm.smul.fix.sat.

As described in the LangRef rounding is unspecified for
these instrinsics. If the result cannot be represented
exactly the default behavior in ConstantFolding is to
round down towards negative infinity. If a target has a
preferred rounding that is different some kind of target
hook would be needed (same strategy as used by the
SelectionDAG legalizer).

Reviewers: nikic, leonardchan, RKSimon

Reviewed By: leonardchan

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63385

Added:
    llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll
    llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll
Modified:
    llvm/trunk/lib/Analysis/ConstantFolding.cpp

Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=363811&r1=363810&r2=363811&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original)
+++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Wed Jun 19 07:28:03 2019
@@ -1422,6 +1422,8 @@ bool llvm::canConstantFoldCallTo(const C
   case Intrinsic::uadd_sat:
   case Intrinsic::ssub_sat:
   case Intrinsic::usub_sat:
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
   case Intrinsic::bitreverse:
@@ -2198,6 +2200,43 @@ static Constant *ConstantFoldScalarCall3
     }
   }
 
+  if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+    if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+      if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::smul_fix:
+        case Intrinsic::smul_fix_sat: {
+          // This code performs rounding towards negative infinity in case the
+          // result cannot be represented exactly for the given scale. Targets
+          // that do care about rounding should use a target hook for specifying
+          // how rounding should be done, and provide their own folding to be
+          // consistent with rounding. This is the same approach as used by
+          // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+          APInt Lhs = Op1->getValue();
+          APInt Rhs = Op2->getValue();
+          unsigned Scale = Op3->getValue().getZExtValue();
+          unsigned Width = Lhs.getBitWidth();
+          assert(Scale < Width && "Illegal scale.");
+          unsigned ExtendedWidth = Width * 2;
+          APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+                           Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+          if (IntrinsicID == Intrinsic::smul_fix_sat) {
+            APInt MaxValue =
+              APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+            APInt MinValue =
+              APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+            Product = APIntOps::smin(Product, MaxValue);
+            Product = APIntOps::smax(Product, MinValue);
+          }
+          return ConstantInt::get(Ty->getContext(),
+                                  Product.sextOrTrunc(Width));
+        }
+        }
+      }
+    }
+  }
+
   if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
     const APInt *C0, *C1, *C2;
     if (!getConstIntOrUndef(Operands[0], C0) ||
@@ -2307,6 +2346,13 @@ static Constant *ConstantFoldVectorCall(
         Lane[J] = Operands[J];
         continue;
       }
+      // These intrinsics use a scalar type for their third argument.
+      if (J == 2 &&
+          (IntrinsicID == Intrinsic::smul_fix ||
+           IntrinsicID == Intrinsic::smul_fix_sat)) {
+        Lane[J] = Operands[J];
+        continue;
+      }
 
       Constant *Agg = Operands[J]->getAggregateElement(I);
       if (!Agg)

Added: llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll?rev=363811&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll (added)
+++ llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll Wed Jun 19 07:28:03 2019
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_sat_i32_0() {
+; CHECK-LABEL: @test_smul_fix_sat_i32_0(
+; CHECK-NEXT:    ret i32 536870912
+;
+  %r = call i32 @llvm.smul.fix.sat.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+  ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.sat.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_sat_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_0(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 0, i3 -4, i3 -4, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_1(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 3, i3 2, i3 0, i3 -2, i3 -4, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_2(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_3(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_4(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_5(
+; CHECK-NEXT:    ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_6(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -3, i3 0, i3 3, i3 3, i3 3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_7(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -4, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_8(
+; CHECK-NEXT:    ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 2)
+  ret <8 x i3> %r
+}

Added: llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll?rev=363811&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll (added)
+++ llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll Wed Jun 19 07:28:03 2019
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_i32_0() {
+; CHECK-LABEL: @test_smul_fix_i32_0(
+; CHECK-NEXT:    ret i32 536870912
+;
+  %r = call i32 @llvm.smul.fix.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+  ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_v8i3_0(
+; CHECK-NEXT:    ret <8 x i3> <i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_v8i3_1(
+; CHECK-NEXT:    ret <8 x i3> <i3 0, i3 -2, i3 -4, i3 2, i3 0, i3 -2, i3 -4, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_v8i3_2(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_v8i3_3(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_v8i3_4(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_v8i3_5(
+; CHECK-NEXT:    ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_v8i3_6(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -1, i3 2, i3 -3, i3 0, i3 3, i3 -2, i3 1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_v8i3_7(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 3, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_v8i3_8(
+; CHECK-NEXT:    ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 2)
+  ret <8 x i3> %r
+}