[llvm] SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine (PR #92219)

Thu May 16 00:03:02 PDT 2024

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/92219

>From f71d24ccd29830f5f5c601f4927ddddc927f213e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 13 May 2024 22:58:20 +0200
Subject: [PATCH 1/4] SimplifyLibCalls: Emit vector ldexp intrinsics in
 exp2->ldexp combine

---
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 21 +++++++-----
 llvm/test/Transforms/InstCombine/exp2-1.ll    | 13 +++-----
 .../Transforms/InstCombine/exp2-to-ldexp.ll   | 32 +++++++++++++------
 .../test/Transforms/InstCombine/pow_fp_int.ll |  4 +--
 4 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 9cb8e20b4806f..cfcf517cf555a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2000,11 +2000,16 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     Value *Op = cast<Instruction>(I2F)->getOperand(0);
     // Make sure that the exponent fits inside an "int" of size DstWidth,
     // thus avoiding any range issues that FP has not.
-    unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
-    if (BitWidth < DstWidth ||
-        (BitWidth == DstWidth && isa<SIToFPInst>(I2F)))
-      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth))
-                                  : B.CreateZExt(Op, B.getIntNTy(DstWidth));
+    unsigned BitWidth =
+        Op->getType()->getScalarType()->getPrimitiveSizeInBits();
+    if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
+      Type *IntTy = B.getIntNTy(DstWidth);
+      if (VectorType *VT = dyn_cast<VectorType>(I2F->getType()))
+        IntTy = VectorType::get(IntTy, VT->getElementCount());
+
+      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
+                                  : B.CreateZExt(Op, IntTy);
+    }
   }
 
   return nullptr;
@@ -2377,10 +2382,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
       hasFloatVersion(M, Name))
     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
+  const bool UseIntrinsic = CI->doesNotAccessMemory();
   // Bail out for vectors because the code below only expects scalars.
-  // TODO: This could be allowed if we had a ldexp intrinsic (D14327).
   Type *Ty = CI->getType();
-  if (Ty->isVectorTy())
+  if (!UseIntrinsic && Ty->isVectorTy())
     return Ret;
 
   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
@@ -2393,7 +2398,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
 
       // TODO: Emitting the intrinsic should not depend on whether the libcall
       // is available.
-      if (CI->doesNotAccessMemory()) {
+      if (UseIntrinsic) {
         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
                                                 {Ty, Exp->getType()},
                                                 {One, Exp}, CI));
diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
index 5bf70320d9ec4..2dff0b08ecf97 100644
--- a/llvm/test/Transforms/InstCombine/exp2-1.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -308,20 +308,15 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) {
 
 define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) {
 ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP32-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP32-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
+; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; LDEXP32-NEXT:    ret <2 x float> [[R]]
 ;
 ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP16-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP16-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16>
+; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i16> [[TMP1]])
 ; LDEXP16-NEXT:    ret <2 x float> [[R]]
 ;
-; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; NOLDEXPF-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; NOLDEXPF-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
-; NOLDEXPF-NEXT:    ret <2 x float> [[R]]
-;
 ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; NOLDEXP-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
 ; NOLDEXP-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
index 3069ee65e238a..6e5be5a19d6da 100644
--- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
@@ -39,11 +39,17 @@ define float @exp2_f32_sitofp_i8_flags(i8 %x) {
 }
 
 define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) {
-; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
-; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+; LDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; LDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <2 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; NOLDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i8> %x to <2 x float>
   %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %itofp)
@@ -117,11 +123,17 @@ define fp128 @exp2_fp128_sitofp_i8(i8 %x) {
 }
 
 define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(<vscale x 4 x i8> %x) {
-; CHECK-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
-; CHECK-SAME: <vscale x 4 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+; LDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; LDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <vscale x 4 x i8> [[X]] to <vscale x 4 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; NOLDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
 ;
   %itofp = sitofp <vscale x 4 x i8> %x to <vscale x 4 x float>
   %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %itofp)
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
index 9c1fa88f3183e..7b194b3f8925f 100644
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -530,8 +530,8 @@ define double @powf_exp_const2_int_no_fast(double %base) {
 define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %s = sitofp <2 x i8> %x to <2 x float>

>From 1dda52d6ee5c3ecae52e6767364d6f16128a7558 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 15 May 2024 09:23:24 +0200
Subject: [PATCH 2/4] Use getWithNewBitWidth

---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cfcf517cf555a..5aa3cd5364677 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2003,10 +2003,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     unsigned BitWidth =
         Op->getType()->getScalarType()->getPrimitiveSizeInBits();
     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
-      Type *IntTy = B.getIntNTy(DstWidth);
-      if (VectorType *VT = dyn_cast<VectorType>(I2F->getType()))
-        IntTy = VectorType::get(IntTy, VT->getElementCount());
-
+      Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
                                   : B.CreateZExt(Op, IntTy);
     }

>From accf6369d78dbf04666359db174974b7abb11ccf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <arsenm2 at gmail.com>
Date: Thu, 16 May 2024 08:58:25 +0200
Subject: [PATCH 3/4] Use getScalarSizeInBits

Co-authored-by: Nikita Popov <github at npopov.com>
---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5aa3cd5364677..ec8c9073a3fc6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2001,7 +2001,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     // Make sure that the exponent fits inside an "int" of size DstWidth,
     // thus avoiding any range issues that FP has not.
     unsigned BitWidth =
-        Op->getType()->getScalarType()->getPrimitiveSizeInBits();
+        Op->getType()->getScalarSizeInBits();
     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
       Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)

>From fddbef6d0659a9f4053c51b39de1cec6a6cdeec6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <arsenm2 at gmail.com>
Date: Thu, 16 May 2024 09:02:54 +0200
Subject: [PATCH 4/4] Fix clang-format error

---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index ec8c9073a3fc6..21ec07ae0221d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2000,8 +2000,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     Value *Op = cast<Instruction>(I2F)->getOperand(0);
     // Make sure that the exponent fits inside an "int" of size DstWidth,
     // thus avoiding any range issues that FP has not.
-    unsigned BitWidth =
-        Op->getType()->getScalarSizeInBits();
+    unsigned BitWidth = Op->getType()->getScalarSizeInBits();
     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
       Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)