[llvm] [NVPTX] Improve modeling of inline PTX (PR #130675)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 16:48:55 PDT 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/130675
>From b2f38aae0b50454ea1938a7021df3a390be79a9d Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 7 Mar 2025 23:37:32 +0000
Subject: [PATCH 1/2] [NVPTX] Improve modeling of inline PTX
---
llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp | 28 +++++++++++
llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h | 6 +++
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 29 ++++++++++++
.../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 ++
.../Analysis/CostModel/NVPTX/inline-asm.ll | 21 +++++++++
.../Analysis/CostModel/NVPTX/lit.local.cfg | 2 +
.../test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll | 47 +++++++++++++++++++
7 files changed, 137 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll
create mode 100644 llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg
create mode 100644 llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
index 509b01213cd9c..0cc2132143af1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -115,3 +116,30 @@ ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc,
return ModRefInfo::ModRef;
}
+
+MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call,
+ AAQueryInfo &AAQI) {
+ // Inline assembly with no side-effect or memory clobbers should not
+ // indirectly access memory in the PTX specification.
+ if (const auto *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
+ // Volatile is translated as side-effects.
+ if (IA->hasSideEffects())
+ return MemoryEffects::unknown();
+
+ for (const InlineAsm::ConstraintInfo &Constraint : IA->ParseConstraints()) {
+ // Indirect constraints (e.g. =*m) are unsupported in inline PTX.
+ if (Constraint.isIndirect)
+ return MemoryEffects::unknown();
+
+ // Memory clobbers prevent optimization.
+ if (!(Constraint.Type & InlineAsm::ConstraintPrefix::isClobber))
+ continue;
+ for (const std::string &Code : Constraint.Codes)
+ if (Code == "{memory}")
+ return MemoryEffects::unknown();
+ }
+ return MemoryEffects::none();
+ }
+
+ return MemoryEffects::unknown();
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
index 2d204979eb6ce..cfbf5dee3ec50 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -36,6 +36,12 @@ class NVPTXAAResult : public AAResultBase {
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
bool IgnoreLocals);
+
+ MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI);
+
+ MemoryEffects getMemoryEffects(const Function *F) {
+ return MemoryEffects::unknown();
+ }
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 51c679b8ad89c..4d12d15b1d80b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -8,6 +8,7 @@
#include "NVPTXTargetTransformInfo.h"
#include "NVPTXUtilities.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -483,6 +484,34 @@ NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return std::nullopt;
}
+InstructionCost
+NVPTXTTIImpl::getInstructionCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
+ if (const auto *CI = dyn_cast<CallInst>(U))
+ if (const auto *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
+ // Without this implementation getCallCost() would return the number
+ // of arguments+1 as the cost. Because the cost-model assumes it is a call
+ // since it is classified as a call in the IR. A better cost model would
+ // be to return the number of asm instructions embedded in the asm
+ // string.
+ auto &AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ const unsigned InstCount = count_if(AsmPieces, [](StringRef AsmInst) {
+ AsmInst = AsmInst.trim();
+ // This is pretty course but does a reasonably good job of identifying
+ // things that look like instructions, possibly with a predicate ("@").
+ return !AsmInst.empty() && (AsmInst[0] == '@' || isAlpha(AsmInst[0]) ||
+ AsmInst.find(".pragma") != StringRef::npos);
+ });
+ return InstCount * TargetTransformInfo::TCC_Basic;
+ }
+
+ return BaseT::getInstructionCost(U, Operands, CostKind);
+}
+
InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 7f69d422e8b4b..6db36e958b28c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -94,6 +94,10 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
// calls are particularly expensive in NVPTX.
unsigned getInliningThresholdMultiplier() const { return 11; }
+ InstructionCost getInstructionCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
+
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
diff --git a/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll
new file mode 100644
index 0000000000000..600e3b5d537c9
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @test1() {
+; CHECK-LABEL: 'test1'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.000000e+00)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"()
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+ %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.0)
+ %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3)
+ %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0)
+ %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0)
+ call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"()
+ ret void
+}
diff --git a/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg
new file mode 100644
index 0000000000000..0d37b86e1c8e6
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "NVPTX" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll
new file mode 100644
index 0000000000000..b03fae365f264
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll
@@ -0,0 +1,47 @@
+; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \
+; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+;;CHECK-ALIAS-LABEL: Function: test_sideeffect
+;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> call
+define void @test_sideeffect(ptr %out) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ call void asm sideeffect "membar.gl;", ""()
+ store i32 5, ptr addrspace(1) %0, align 4
+ ret void
+}
+
+;;CHECK-ALIAS-LABEL: Function: test_indirect
+;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call
+define i32 @test_indirect(ptr %out) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ store i32 0, ptr addrspace(1) %0, align 4
+ %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,*m"(ptr addrspace(1) elementtype(i32) %0)
+ store i32 0, ptr addrspace(1) %0, align 4
+ ret i32 %1
+}
+
+;;CHECK-ALIAS-LABEL: Function: test_memory
+;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call
+define i32 @test_memory(ptr %out) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ store i32 0, ptr addrspace(1) %0, align 4
+ %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,l,~{memory}"(ptr addrspace(1) %0)
+ store i32 0, ptr addrspace(1) %0, align 4
+ ret i32 %1
+}
+
+;;CHECK-ALIAS-LABEL: Function: test_no_sideeffect
+;;CHECK-ALIAS: NoModRef: Ptr: i32* %0 <-> %1 = call
+define void @test_no_sideeffect(ptr %in, ptr %out) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ %1 = call i32 asm "cvt.u32.u64 $0, $1;", "=r,l"(ptr %in)
+ store i32 %1, ptr addrspace(1) %0, align 4
+ ret void
+}
>From 76a4ec80f6d9195275a34325964108855a9c5c6b Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 13 Mar 2025 23:48:43 +0000
Subject: [PATCH 2/2] address comments
---
llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
index 0cc2132143af1..a784cc0d12a57 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
@@ -142,4 +142,4 @@ MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call,
}
return MemoryEffects::unknown();
-}
\ No newline at end of file
+}
More information about the llvm-commits
mailing list