[llvm] [NVPTX] Improve NVVMReflect Efficiency (PR #134416)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 10:40:47 PDT 2025
================
@@ -39,33 +38,47 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/StripGCRelocates.h"
#include <algorithm>
#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
#define NVVM_REFLECT_OCL_FUNCTION "__nvvm_reflect_ocl"
using namespace llvm;
-#define DEBUG_TYPE "nvptx-reflect"
+#define DEBUG_TYPE "nvvm-reflect"
namespace llvm {
void initializeNVVMReflectPass(PassRegistry &);
}
namespace {
-class NVVMReflect : public FunctionPass {
+class NVVMReflect : public ModulePass {
+private:
+ StringMap<int> VarMap;
+ /// Process a reflect function by finding all its uses and replacing them with
+ /// appropriate constant values. For __CUDA_FTZ, uses the module flag value.
+ /// For __CUDA_ARCH, uses SmVersion * 10. For all other strings, uses 0.
+ bool handleReflectFunction(Function *F);
+ void setVarMap(Module &M);
+
public:
static char ID;
- unsigned int SmVersion;
NVVMReflect() : NVVMReflect(0) {}
- explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
+ // __CUDA_FTZ is assigned in `runOnModule` by checking nvvm-reflect-ftz module
+ // metadata.
+ explicit NVVMReflect(unsigned int Sm) : ModulePass(ID) {
+ VarMap["__CUDA_ARCH"] = Sm * 10;
----------------
AlexMaclean wrote:
I think you can set this in an itializer as well with something like `VarMap({{"__CUDA_ARCH", Sm * 10}})`
https://github.com/llvm/llvm-project/pull/134416
More information about the llvm-commits
mailing list