[llvm] PreISelIntrinsicLowering: Lower llvm.exp to a loop if scalable vec arg (PR #117568)
Stephen Long via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 11:19:55 PST 2024
https://github.com/steplong updated https://github.com/llvm/llvm-project/pull/117568
>From 974cda322e9363506ad8557d131c4d0675adb5d5 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Fri, 22 Nov 2024 07:32:17 -0800
Subject: [PATCH 1/8] PreISelIntrinsicLowering: Lower llvm.exp to a loop if
scalable vec arg
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 57 +++++++++++++++++++
.../PreISelIntrinsicLowering/expand-exp.ll | 23 ++++++++
2 files changed, 80 insertions(+)
create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 4a3d1673c2a7c1..74f54e43a8386f 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -335,6 +335,59 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
return Changed;
}
+static bool lowerExpIntrinsicToLoop(Module &M, Function &F, CallInst *CI) {
+ ScalableVectorType *ScalableTy =
+ dyn_cast<ScalableVectorType>(F.getArg(0)->getType());
+ if (!ScalableTy) {
+ return false;
+ }
+
+ BasicBlock *PreLoopBB = CI->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ PostLoopBB = PreLoopBB->splitBasicBlock(CI);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ // loop preheader
+ IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
+ Value *VScale = PreLoopBuilder.CreateVScale(
+ ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
+ Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ ScalableTy->getMinNumElements());
+ Value *LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
+
+ // loop body
+ IRBuilder<> LoopBuilder(LoopBB);
+ Type *Int64Ty = LoopBuilder.getInt64Ty();
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
+ LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
+ PHINode *Vec = LoopBuilder.CreatePHI(ScalableTy, 2);
+ Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
+
+ Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
+ Function *Exp = Intrinsic::getOrInsertDeclaration(
+ &M, Intrinsic::exp, ScalableTy->getElementType());
+ Value *Res = LoopBuilder.CreateCall(Exp, Elem);
+ Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
+ Vec->addIncoming(NewVec, LoopBB);
+
+ Value *One = ConstantInt::get(Int64Ty, 1U);
+ Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
+ LoopIndex->addIncoming(NextLoopIndex, LoopBB);
+
+ Value *ExitCond =
+ LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
+ LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);
+
+ CI->replaceAllUsesWith(NewVec);
+ CI->eraseFromParent();
+ return true;
+}
+
bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
bool Changed = false;
for (Function &F : M) {
@@ -453,6 +506,10 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
case Intrinsic::objc_sync_exit:
Changed |= lowerObjCCall(F, "objc_sync_exit");
break;
+ case Intrinsic::exp:
+ Changed |= forEachCall(
+ F, [&](CallInst *CI) { return lowerExpIntrinsicToLoop(M, F, CI); });
+ break;
}
}
return Changed;
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
new file mode 100644
index 00000000000000..6ad181033f233a
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
@@ -0,0 +1,23 @@
+; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
+
+define <vscale x 4 x float> @softmax_kernel() {
+; CHECK-LABEL: define <vscale x 4 x float> @softmax_kernel(
+; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[LOOPEND:%.*]] = mul i64 [[VSCALE]], 4
+; CHECK-NEXT: br label %[[LOOPBODY:.*]]
+; CHECK: [[LOOPBODY]]:
+; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %0 ], [ [[NEW_IDX:%.*]], %[[LOOPBODY]] ]
+; CHECK-NEXT: [[VEC:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %0 ], [ [[NEW_VEC:.*]], %[[LOOPBODY]] ]
+; CHECK-NEXT: [[ELEM:%.*]] = extractelement <vscale x 4 x float> [[VEC]], i64 [[IDX]]
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.exp.f32(float [[ELEM]])
+; CHECK-NEXT: [[NEW_VEC:%.*]] = insertelement <vscale x 4 x float> [[VEC]], float [[RES]], i64 [[IDX]]
+; CHECK-NEXT: [[NEW_IDX]] = add i64 [[IDX]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEW_IDX]], [[LOOPEND]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOPEXIT:.*]], label %[[LOOPBODY]]
+; CHECK: [[LOOPEXIT]]:
+; CHECK-NEXT: ret <vscale x 4 x float> [[NEW_VEC]]
+ %1 = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> zeroinitializer)
+ ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
>From 782e904bcd8e93c4e370be774f7ca35f8fb32187 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Mon, 25 Nov 2024 09:17:17 -0800
Subject: [PATCH 2/8] Fix test
---
.../PreISelIntrinsicLowering/expand-exp.ll | 20 +++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
index 6ad181033f233a..2d5a9673727d5b 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
@@ -1,13 +1,15 @@
; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
-define <vscale x 4 x float> @softmax_kernel() {
-; CHECK-LABEL: define <vscale x 4 x float> @softmax_kernel(
+define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
+; CHECK-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[LOOPEND:%.*]] = mul i64 [[VSCALE]], 4
; CHECK-NEXT: br label %[[LOOPBODY:.*]]
; CHECK: [[LOOPBODY]]:
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %0 ], [ [[NEW_IDX:%.*]], %[[LOOPBODY]] ]
-; CHECK-NEXT: [[VEC:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %0 ], [ [[NEW_VEC:.*]], %[[LOOPBODY]] ]
+; CHECK-NEXT: [[VEC:%.*]] = phi <vscale x 4 x float> [ %input, %0 ], [ [[NEW_VEC:.*]], %[[LOOPBODY]] ]
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <vscale x 4 x float> [[VEC]], i64 [[IDX]]
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.exp.f32(float [[ELEM]])
; CHECK-NEXT: [[NEW_VEC:%.*]] = insertelement <vscale x 4 x float> [[VEC]], float [[RES]], i64 [[IDX]]
@@ -16,8 +18,14 @@ define <vscale x 4 x float> @softmax_kernel() {
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOPEXIT:.*]], label %[[LOOPBODY]]
; CHECK: [[LOOPEXIT]]:
; CHECK-NEXT: ret <vscale x 4 x float> [[NEW_VEC]]
- %1 = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> zeroinitializer)
- ret <vscale x 4 x float> %1
+ %output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
+ ret <vscale x 4 x float> %output
}
-declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
+; CHECK: declare i64 @llvm.vscale.i64() #1
+; CHECK: declare float @llvm.exp.f32(float) #0
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0
+
+; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
>From bca4b5d8283e298345760771fe5e2d4dd2b5e624 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Mon, 25 Nov 2024 11:28:30 -0800
Subject: [PATCH 3/8] Generalize helper function
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 74f54e43a8386f..1a02a08508d7a8 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -335,9 +335,9 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
return Changed;
}
-static bool lowerExpIntrinsicToLoop(Module &M, Function &F, CallInst *CI) {
+static bool lowerExpIntrinsicToLoop(Module &M, CallInst *CI) {
ScalableVectorType *ScalableTy =
- dyn_cast<ScalableVectorType>(F.getArg(0)->getType());
+ dyn_cast<ScalableVectorType>(CI->getArgOperand(0)->getType());
if (!ScalableTy) {
return false;
}
@@ -370,7 +370,7 @@ static bool lowerExpIntrinsicToLoop(Module &M, Function &F, CallInst *CI) {
Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
Function *Exp = Intrinsic::getOrInsertDeclaration(
- &M, Intrinsic::exp, ScalableTy->getElementType());
+ &M, CI->getIntrinsicID(), ScalableTy->getElementType());
Value *Res = LoopBuilder.CreateCall(Exp, Elem);
Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
Vec->addIncoming(NewVec, LoopBB);
@@ -508,7 +508,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
break;
case Intrinsic::exp:
Changed |= forEachCall(
- F, [&](CallInst *CI) { return lowerExpIntrinsicToLoop(M, F, CI); });
+ F, [&](CallInst *CI) { return lowerExpIntrinsicToLoop(M, CI); });
break;
}
}
>From 1ff0016aff3a183e39c44ee26ad2e1a0f7d6c124 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Mon, 25 Nov 2024 12:33:04 -0800
Subject: [PATCH 4/8] Move helper function to utils
---
.../Transforms/Utils/LowerMathIntrinsics.h | 27 +++++++
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 59 ++--------------
llvm/lib/Transforms/Utils/CMakeLists.txt | 1 +
.../Transforms/Utils/LowerMathIntrinsics.cpp | 70 +++++++++++++++++++
.../llvm/lib/Transforms/Utils/BUILD.gn | 1 +
5 files changed, 103 insertions(+), 55 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
create mode 100644 llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
new file mode 100644
index 00000000000000..6984021b01d6ca
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
@@ -0,0 +1,27 @@
+//===- llvm/Transforms/Utils/LowerMathIntrinsics.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Lower math intrinsics with a scalable vector arg to loops.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
+#define LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
+
+#include <cstdint>
+#include <optional>
+
+namespace llvm {
+
+/// Lower \p CI as a loop. \p CI is a unary math intrinsic with a scalable
+/// vector argument and is deleted and replaced with a loop.
+bool lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M, CallInst *CI);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 1a02a08508d7a8..5c8d84f6416091 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Utils/LowerMathIntrinsics.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
using namespace llvm;
@@ -335,59 +336,6 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
return Changed;
}
-static bool lowerExpIntrinsicToLoop(Module &M, CallInst *CI) {
- ScalableVectorType *ScalableTy =
- dyn_cast<ScalableVectorType>(CI->getArgOperand(0)->getType());
- if (!ScalableTy) {
- return false;
- }
-
- BasicBlock *PreLoopBB = CI->getParent();
- BasicBlock *PostLoopBB = nullptr;
- Function *ParentFunc = PreLoopBB->getParent();
- LLVMContext &Ctx = PreLoopBB->getContext();
-
- PostLoopBB = PreLoopBB->splitBasicBlock(CI);
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
- PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
-
- // loop preheader
- IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
- Value *VScale = PreLoopBuilder.CreateVScale(
- ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
- Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
- ScalableTy->getMinNumElements());
- Value *LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
-
- // loop body
- IRBuilder<> LoopBuilder(LoopBB);
- Type *Int64Ty = LoopBuilder.getInt64Ty();
-
- PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
- LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
- PHINode *Vec = LoopBuilder.CreatePHI(ScalableTy, 2);
- Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
-
- Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
- Function *Exp = Intrinsic::getOrInsertDeclaration(
- &M, CI->getIntrinsicID(), ScalableTy->getElementType());
- Value *Res = LoopBuilder.CreateCall(Exp, Elem);
- Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
- Vec->addIncoming(NewVec, LoopBB);
-
- Value *One = ConstantInt::get(Int64Ty, 1U);
- Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
- LoopIndex->addIncoming(NextLoopIndex, LoopBB);
-
- Value *ExitCond =
- LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
- LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);
-
- CI->replaceAllUsesWith(NewVec);
- CI->eraseFromParent();
- return true;
-}
-
bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
bool Changed = false;
for (Function &F : M) {
@@ -507,8 +455,9 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
Changed |= lowerObjCCall(F, "objc_sync_exit");
break;
case Intrinsic::exp:
- Changed |= forEachCall(
- F, [&](CallInst *CI) { return lowerExpIntrinsicToLoop(M, CI); });
+ Changed |= forEachCall(F, [&](CallInst *CI) {
+ return lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(M, CI);
+ });
break;
}
}
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 65bd3080662c4d..414487215c5391 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_component_library(LLVMTransformUtils
LowerGlobalDtors.cpp
LowerIFunc.cpp
LowerInvoke.cpp
+ LowerMathIntrinsics.cpp
LowerMemIntrinsics.cpp
LowerSwitch.cpp
MatrixUtils.cpp
diff --git a/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
new file mode 100644
index 00000000000000..bdae056017b4bd
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
@@ -0,0 +1,70 @@
+//===- LowerMathIntrinsics.cpp ---------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerMathIntrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "lower-math-intrinsics"
+
+using namespace llvm;
+
+bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
+ CallInst *CI) {
+ ScalableVectorType *ScalableTy =
+ dyn_cast<ScalableVectorType>(CI->getArgOperand(0)->getType());
+ if (!ScalableTy) {
+ return false;
+ }
+
+ BasicBlock *PreLoopBB = CI->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ PostLoopBB = PreLoopBB->splitBasicBlock(CI);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ // loop preheader
+ IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
+ Value *VScale = PreLoopBuilder.CreateVScale(
+ ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
+ Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ ScalableTy->getMinNumElements());
+ Value *LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
+
+ // loop body
+ IRBuilder<> LoopBuilder(LoopBB);
+ Type *Int64Ty = LoopBuilder.getInt64Ty();
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
+ LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
+ PHINode *Vec = LoopBuilder.CreatePHI(ScalableTy, 2);
+ Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
+
+ Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
+ Function *Exp = Intrinsic::getOrInsertDeclaration(
+ &M, CI->getIntrinsicID(), ScalableTy->getElementType());
+ Value *Res = LoopBuilder.CreateCall(Exp, Elem);
+ Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
+ Vec->addIncoming(NewVec, LoopBB);
+
+ Value *One = ConstantInt::get(Int64Ty, 1U);
+ Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
+ LoopIndex->addIncoming(NextLoopIndex, LoopBB);
+
+ Value *ExitCond =
+ LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
+ LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);
+
+ CI->replaceAllUsesWith(NewVec);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
index 1479e1c355d957..bd216fbe9c467d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
@@ -62,6 +62,7 @@ static_library("Utils") {
"LowerGlobalDtors.cpp",
"LowerIFunc.cpp",
"LowerInvoke.cpp",
+ "LowerMathIntrinsics.cpp",
"LowerMemIntrinsics.cpp",
"LowerSwitch.cpp",
"MatrixUtils.cpp",
>From b6019b1ddb6cf2c53ff70410958b8de1f42bd872 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Tue, 3 Dec 2024 09:01:39 -0800
Subject: [PATCH 5/8] Fix build error
---
llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
index 6984021b01d6ca..b7a5d2e2a2f635 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
@@ -13,6 +13,8 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
#define LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include <cstdint>
#include <optional>
>From a60fb938fcf7fe8e9338f61755c01910a93edc94 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Wed, 4 Dec 2024 07:59:06 -0800
Subject: [PATCH 6/8] Cleanup header and comments
---
llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h | 5 +++--
llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp | 6 +++---
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
index b7a5d2e2a2f635..4ac26cb460cf71 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
@@ -13,13 +13,14 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
#define LLVM_TRANSFORMS_UTILS_LOWERMATHINTRINSICS_H
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include <cstdint>
#include <optional>
namespace llvm {
+class CallInst;
+class Module;
+
/// Lower \p CI as a loop. \p CI is a unary math intrinsic with a scalable
/// vector argument and is deleted and replaced with a loop.
bool lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M, CallInst *CI);
diff --git a/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
index bdae056017b4bd..b2bff9cde42479 100644
--- a/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
@@ -1,4 +1,4 @@
-//===- LowerMathIntrinsics.cpp ---------------------------------*- C++ -*--===//
+//===- LowerMathIntrinsics.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -32,7 +32,7 @@ bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
- // loop preheader
+ // Loop preheader
IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
Value *VScale = PreLoopBuilder.CreateVScale(
ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
@@ -40,7 +40,7 @@ bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
ScalableTy->getMinNumElements());
Value *LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
- // loop body
+ // Loop body
IRBuilder<> LoopBuilder(LoopBB);
Type *Int64Ty = LoopBuilder.getInt64Ty();
>From 33bcf55bc0f6aba773f3239e9249ed548df691a0 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Wed, 4 Dec 2024 08:01:06 -0800
Subject: [PATCH 7/8] Create AArch64 subdir in PreISelIntrinsicLowering tests
and move expand-exp.ll there
---
.../PreISelIntrinsicLowering/{ => AArch64}/expand-exp.ll | 0
.../Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg | 2 ++
2 files changed, 2 insertions(+)
rename llvm/test/Transforms/PreISelIntrinsicLowering/{ => AArch64}/expand-exp.ll (100%)
create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
similarity index 100%
rename from llvm/test/Transforms/PreISelIntrinsicLowering/expand-exp.ll
rename to llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
new file mode 100644
index 00000000000000..10d4a0e953ed47
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AArch64" in config.root.targets:
+ config.unsupported = True
>From 24a2204a1e899737168913bb05a9314267b8a83d Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Wed, 4 Dec 2024 11:14:31 -0800
Subject: [PATCH 8/8] Handle fixed vector type as well
---
.../Transforms/Utils/LowerMathIntrinsics.h | 6 ++--
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 6 +++-
.../Transforms/Utils/LowerMathIntrinsics.cpp | 34 +++++++++++--------
3 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
index 4ac26cb460cf71..56f0e1e9719b5a 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMathIntrinsics.h
@@ -21,9 +21,9 @@ namespace llvm {
class CallInst;
class Module;
-/// Lower \p CI as a loop. \p CI is a unary math intrinsic with a scalable
-/// vector argument and is deleted and replaced with a loop.
-bool lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M, CallInst *CI);
+/// Lower \p CI as a loop. \p CI is a unary math intrinsic with a vector
+/// argument and is deleted and replaced with a loop.
+bool lowerUnaryMathIntrinsicWithVecArgAsLoop(Module &M, CallInst *CI);
} // namespace llvm
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 5c8d84f6416091..046f4f9952d1b8 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -456,7 +456,11 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
break;
case Intrinsic::exp:
Changed |= forEachCall(F, [&](CallInst *CI) {
- return lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(M, CI);
+ // TODO: Check legality
+ if (!CI->getArgOperand(0)->getType()->isVectorTy()) {
+ return false;
+ }
+ return lowerUnaryMathIntrinsicWithVecArgAsLoop(M, CI);
});
break;
}
diff --git a/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
index b2bff9cde42479..85db97b2c7f1ad 100644
--- a/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMathIntrinsics.cpp
@@ -15,13 +15,9 @@
using namespace llvm;
-bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
- CallInst *CI) {
- ScalableVectorType *ScalableTy =
- dyn_cast<ScalableVectorType>(CI->getArgOperand(0)->getType());
- if (!ScalableTy) {
- return false;
- }
+bool llvm::lowerUnaryMathIntrinsicWithVecArgAsLoop(Module &M, CallInst *CI) {
+ Type *ArgTy = CI->getArgOperand(0)->getType();
+ VectorType *VecTy = cast<VectorType>(ArgTy);
BasicBlock *PreLoopBB = CI->getParent();
BasicBlock *PostLoopBB = nullptr;
@@ -34,11 +30,19 @@ bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
// Loop preheader
IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
- Value *VScale = PreLoopBuilder.CreateVScale(
- ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
- Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
- ScalableTy->getMinNumElements());
- Value *LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
+ Value *LoopEnd = nullptr;
+ if (VecTy->isScalableTy()) {
+ ScalableVectorType *ScalableVecTy = cast<ScalableVectorType>(VecTy);
+ Value *VScale = PreLoopBuilder.CreateVScale(
+ ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
+ Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ ScalableVecTy->getMinNumElements());
+ LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
+ } else {
+ FixedVectorType *FixedVecTy = cast<FixedVectorType>(VecTy);
+ LoopEnd = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ FixedVecTy->getNumElements());
+ }
// Loop body
IRBuilder<> LoopBuilder(LoopBB);
@@ -46,12 +50,12 @@ bool llvm::lowerUnaryMathIntrinsicWithScalableVecArgAsLoop(Module &M,
PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
- PHINode *Vec = LoopBuilder.CreatePHI(ScalableTy, 2);
+ PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
- Function *Exp = Intrinsic::getOrInsertDeclaration(
- &M, CI->getIntrinsicID(), ScalableTy->getElementType());
+ Function *Exp = Intrinsic::getOrInsertDeclaration(&M, CI->getIntrinsicID(),
+ VecTy->getElementType());
Value *Res = LoopBuilder.CreateCall(Exp, Elem);
Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
Vec->addIncoming(NewVec, LoopBB);
More information about the llvm-commits
mailing list