[llvm] PreISelIntrinsicLowering: Lower llvm.exp/llvm.exp2 to a loop if scalable vec arg (PR #117568)
Stephen Long via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 08:58:33 PST 2025
https://github.com/steplong updated https://github.com/llvm/llvm-project/pull/117568
>From bac7223371d82a7006ac1f932ed3b7a7d92ffbf2 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Fri, 22 Nov 2024 07:32:17 -0800
Subject: [PATCH 1/4] PreISelIntrinsicLowering: Lower llvm.exp/llvm.exp2 to a
loop if scalable vec arg
If the argument to the intrinsic call to llvm.exp and llvm.exp2 is a
scalable vector, lower it into a loop in PreISelIntrinsicLowering. If it
is a fixed vector, let SelectionDAG handle it.
---
.../include/llvm/Analysis/TargetLibraryInfo.h | 16 ++++
.../Transforms/Utils/LowerVectorIntrinsics.h | 30 ++++++++
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 14 ++++
llvm/lib/Transforms/Utils/CMakeLists.txt | 1 +
.../Utils/LowerVectorIntrinsics.cpp | 73 +++++++++++++++++++
.../AArch64/expand-exp.ll | 43 +++++++++++
.../AArch64/lit.local.cfg | 2 +
.../llvm/lib/Transforms/Utils/BUILD.gn | 1 +
8 files changed, 180 insertions(+)
create mode 100644 llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h
create mode 100644 llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp
create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index f51d2bb9d50a21..aa6dc105bf8bed 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -10,6 +10,7 @@
#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
@@ -457,6 +458,21 @@ class TargetLibraryInfo {
return Impl->CustomNames.find(F)->second;
}
+ static unsigned getISDNode(Intrinsic::ID ID) {
+ unsigned Node;
+ switch (ID) {
+ case Intrinsic::exp:
+ Node = ISD::FEXP;
+ break;
+ case Intrinsic::exp2:
+ Node = ISD::FEXP2;
+ break;
+ default:
+ llvm_unreachable("Intrinsic ID not supported yet");
+ }
+ return Node;
+ }
+
static void initExtensionsForTriple(bool &ShouldExtI32Param,
bool &ShouldExtI32Return,
bool &ShouldSignExtI32Param,
diff --git a/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h
new file mode 100644
index 00000000000000..cb48bb01e178a2
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h
@@ -0,0 +1,30 @@
+//===- llvm/Transforms/Utils/LowerVectorIntrinsics.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Lower intrinsics with a scalable vector arg to loops.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H
+#define LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H
+
+#include <cstdint>
+#include <optional>
+
+namespace llvm {
+
+class CallInst;
+class Module;
+
+/// Lower \p CI as a loop. \p CI is a unary intrinsic with a vector argument and
+/// is deleted and replaced with a loop.
+bool lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 4a3d1673c2a7c1..cc1ba4eb69ddb9 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
using namespace llvm;
@@ -453,6 +454,19 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
case Intrinsic::objc_sync_exit:
Changed |= lowerObjCCall(F, "objc_sync_exit");
break;
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ Changed |= forEachCall(F, [&](CallInst *CI) {
+ Type *Ty = CI->getArgOperand(0)->getType();
+ if (!isa<ScalableVectorType>(Ty))
+ return false;
+ const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ unsigned Op = TargetLibraryInfo::getISDNode(F.getIntrinsicID());
+ if (!TL->isOperationExpand(Op, EVT::getEVT(Ty)))
+ return false;
+ return lowerUnaryVectorIntrinsicAsLoop(M, CI);
+ });
+ break;
}
}
return Changed;
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 65bd3080662c4d..78cad0d253be8d 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -56,6 +56,7 @@ add_llvm_component_library(LLVMTransformUtils
LowerInvoke.cpp
LowerMemIntrinsics.cpp
LowerSwitch.cpp
+ LowerVectorIntrinsics.cpp
MatrixUtils.cpp
MemoryOpRemark.cpp
MemoryTaggingSupport.cpp
diff --git a/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp
new file mode 100644
index 00000000000000..cd716deec14f56
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp
@@ -0,0 +1,73 @@
+//===- LowerVectorIntrinsics.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "lower-vector-intrinsics"
+
+using namespace llvm;
+
+bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
+ Type *ArgTy = CI->getArgOperand(0)->getType();
+ VectorType *VecTy = cast<VectorType>(ArgTy);
+
+ BasicBlock *PreLoopBB = CI->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ PostLoopBB = PreLoopBB->splitBasicBlock(CI);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ // Loop preheader
+ IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
+ Value *LoopEnd = nullptr;
+ if (auto *ScalableVecTy = dyn_cast<ScalableVectorType>(VecTy)) {
+ Value *VScale = PreLoopBuilder.CreateVScale(
+ ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
+ Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ ScalableVecTy->getMinNumElements());
+ LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
+ } else {
+ FixedVectorType *FixedVecTy = cast<FixedVectorType>(VecTy);
+ LoopEnd = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
+ FixedVecTy->getNumElements());
+ }
+
+ // Loop body
+ IRBuilder<> LoopBuilder(LoopBB);
+ Type *Int64Ty = LoopBuilder.getInt64Ty();
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
+ LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
+ PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
+ Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
+
+ Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
+ Function *Exp = Intrinsic::getOrInsertDeclaration(&M, CI->getIntrinsicID(),
+ VecTy->getElementType());
+ Value *Res = LoopBuilder.CreateCall(Exp, Elem);
+ Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
+ Vec->addIncoming(NewVec, LoopBB);
+
+ Value *One = ConstantInt::get(Int64Ty, 1U);
+ Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
+ LoopIndex->addIncoming(NextLoopIndex, LoopBB);
+
+ Value *ExitCond =
+ LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
+ LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);
+
+ CI->replaceAllUsesWith(NewVec);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
new file mode 100644
index 00000000000000..284f2ad8072fc3
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
+
+define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
+; CHECK-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
+; CHECK-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
+; CHECK-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; CHECK: [[BB11]]:
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP8]]
+;
+ %output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
+ ret <vscale x 4 x float> %output
+}
+
+define <4 x float> @fixed_vec_exp(<4 x float> %input) {
+; CHECK-LABEL: define <4 x float> @fixed_vec_exp(
+; CHECK-SAME: <4 x float> [[INPUT:%.*]]) {
+; CHECK-NEXT: [[OUTPUT:%.*]] = call <4 x float> @llvm.exp.v4f32(<4 x float> [[INPUT]])
+; CHECK-NEXT: ret <4 x float> [[OUTPUT]]
+;
+ %output = call <4 x float> @llvm.exp.v4f32(<4 x float> %input)
+ ret <4 x float> %output
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0
+
+; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
new file mode 100644
index 00000000000000..10d4a0e953ed47
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AArch64" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
index 1479e1c355d957..b16fe19bddfd15 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
@@ -64,6 +64,7 @@ static_library("Utils") {
"LowerInvoke.cpp",
"LowerMemIntrinsics.cpp",
"LowerSwitch.cpp",
+ "LowerVectorIntrinsics.cpp",
"MatrixUtils.cpp",
"Mem2Reg.cpp",
"MemoryOpRemark.cpp",
>From 7c3ce2ad39a4ef68e1dc5b814abf41a920242b2e Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Fri, 24 Jan 2025 07:01:31 -0800
Subject: [PATCH 2/4] Move helper function to TargetLoweringBase
---
llvm/include/llvm/Analysis/TargetLibraryInfo.h | 16 ----------------
llvm/include/llvm/CodeGen/TargetLowering.h | 3 +++
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +-
llvm/lib/CodeGen/TargetLoweringBase.cpp | 10 ++++++++++
4 files changed, 14 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index aa6dc105bf8bed..f51d2bb9d50a21 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -10,7 +10,6 @@
#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
@@ -458,21 +457,6 @@ class TargetLibraryInfo {
return Impl->CustomNames.find(F)->second;
}
- static unsigned getISDNode(Intrinsic::ID ID) {
- unsigned Node;
- switch (ID) {
- case Intrinsic::exp:
- Node = ISD::FEXP;
- break;
- case Intrinsic::exp2:
- Node = ISD::FEXP2;
- break;
- default:
- llvm_unreachable("Intrinsic ID not supported yet");
- }
- return Node;
- }
-
static void initExtensionsForTriple(bool &ShouldExtI32Param,
bool &ShouldExtI32Return,
bool &ShouldSignExtI32Param,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 38ac90f0c081b3..477ecf822736e7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2123,6 +2123,9 @@ class TargetLoweringBase {
/// Get the ISD node that corresponds to the Instruction class opcode.
int InstructionOpcodeToISD(unsigned Opcode) const;
+ /// Get the ISD node that corresponds to the Intrinsic ID.
+ int IntrinsicIDToISD(Intrinsic::ID ID) const;
+
/// @}
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index cc1ba4eb69ddb9..048a6a49e4cb94 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -461,7 +461,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
if (!isa<ScalableVectorType>(Ty))
return false;
const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
- unsigned Op = TargetLibraryInfo::getISDNode(F.getIntrinsicID());
+ unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID());
if (!TL->isOperationExpand(Op, EVT::getEVT(Ty)))
return false;
return lowerUnaryVectorIntrinsicAsLoop(M, CI);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 73af0a9a714074..30df2d1c9b0e53 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1841,6 +1841,16 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
+int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const {
+ switch (ID) {
+ case Intrinsic::exp:
+ return ISD::FEXP;
+ case Intrinsic::exp2:
+ return ISD::FEXP2;
+ }
+ llvm_unreachable("Unsupported Intrinsic ID encountered!");
+}
+
Value *
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
bool UseTLS) const {
>From f588c726e4f0bf45146e83264a6e83fb1c43e3e9 Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Fri, 24 Jan 2025 07:29:28 -0800
Subject: [PATCH 3/4] Return ISD::DELETED_NODE in IntrinsicIDToISD()'s default
case
---
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 30df2d1c9b0e53..9c56912aa6ba03 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1847,8 +1847,9 @@ int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const {
return ISD::FEXP;
case Intrinsic::exp2:
return ISD::FEXP2;
+ default:
+ return ISD::DELETED_NODE;
}
- llvm_unreachable("Unsupported Intrinsic ID encountered!");
}
Value *
>From e7ab0ec91740b026553a39ff2ab8832f0ead663b Mon Sep 17 00:00:00 2001
From: Stephen Long <steplong at quicinc.com>
Date: Fri, 24 Jan 2025 08:58:02 -0800
Subject: [PATCH 4/4] Add comment about returning ISD::DELETED_NODE by default
---
llvm/include/llvm/CodeGen/TargetLowering.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 477ecf822736e7..10c36a12bcf528 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2123,7 +2123,8 @@ class TargetLoweringBase {
/// Get the ISD node that corresponds to the Instruction class opcode.
int InstructionOpcodeToISD(unsigned Opcode) const;
- /// Get the ISD node that corresponds to the Intrinsic ID.
+ /// Get the ISD node that corresponds to the Intrinsic ID. Returns
+ /// ISD::DELETED_NODE by default for an unsupported Intrinsic ID.
int IntrinsicIDToISD(Intrinsic::ID ID) const;
/// @}
More information about the llvm-commits
mailing list