[Mlir-commits] [mlir] [MLIR][NVVM] Enable nvvm intrinsics import (PR #68843)
Ivan R. Ivanov
llvmlistbot at llvm.org
Mon Dec 11 00:07:19 PST 2023
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/68843
>From e8d7b4ca62ae7de2155d6857316087a77e7b767c Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Thu, 12 Oct 2023 10:18:30 +0900
Subject: [PATCH 01/11] Enable LLVMIR module import with nvvm intrinsics
---
mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt | 2 ++
.../LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp | 6 ++++++
2 files changed, 8 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
index 64de028c7fe40..8e41fcc05a161 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
@@ -56,6 +56,8 @@ add_mlir_dialect(NVVMOps nvvm)
add_mlir_doc(NVVMOps NVVMDialect Dialects/ -gen-dialect-doc -dialect=nvvm)
set(LLVM_TARGET_DEFINITIONS NVVMOps.td)
mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions)
+mlir_tablegen(NVVMFromLLVMIRConversions.inc -gen-intr-from-llvmir-conversions)
+mlir_tablegen(NVVMConvertibleLLVMIRIntrinsics.inc -gen-convertible-llvmir-intrinsics)
mlir_tablegen(NVVMOpsEnums.h.inc -gen-enum-decls)
mlir_tablegen(NVVMOpsEnums.cpp.inc -gen-enum-defs)
mlir_tablegen(NVVMOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=nvvm)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
index 40d8253d822f6..d20e95754fbf5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
@@ -13,6 +13,7 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMInterfaces.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVMIR/ModuleImport.h"
@@ -24,6 +25,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/ModRef.h"
using namespace mlir;
@@ -37,6 +39,7 @@ using namespace mlir::LLVM::detail;
static bool isConvertibleIntrinsic(llvm::Intrinsic::ID id) {
static const DenseSet<unsigned> convertibleIntrinsics = {
#include "mlir/Dialect/LLVMIR/LLVMConvertibleLLVMIRIntrinsics.inc"
+#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
};
return convertibleIntrinsics.contains(id);
}
@@ -46,6 +49,7 @@ static bool isConvertibleIntrinsic(llvm::Intrinsic::ID id) {
static ArrayRef<unsigned> getSupportedIntrinsicsImpl() {
static const SmallVector<unsigned> convertibleIntrinsics = {
#include "mlir/Dialect/LLVMIR/LLVMConvertibleLLVMIRIntrinsics.inc"
+#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
};
return convertibleIntrinsics;
}
@@ -63,6 +67,7 @@ static LogicalResult convertIntrinsicImpl(OpBuilder &odsBuilder,
SmallVector<llvm::Value *> args(inst->args());
ArrayRef<llvm::Value *> llvmOperands(args);
#include "mlir/Dialect/LLVMIR/LLVMIntrinsicFromLLVMIRConversions.inc"
+#include "mlir/Dialect/LLVMIR/NVVMFromLLVMIRConversions.inc"
}
return failure();
@@ -281,6 +286,7 @@ class LLVMDialectLLVMIRImportInterface : public LLVMImportDialectInterface {
void mlir::registerLLVMDialectImport(DialectRegistry ®istry) {
registry.insert<LLVM::LLVMDialect>();
+ registry.insert<NVVM::NVVMDialect>();
registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
dialect->addInterfaces<LLVMDialectLLVMIRImportInterface>();
});
>From 44509b9b8449c522375695b983be1cbcabd06682 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Thu, 12 Oct 2023 10:26:46 +0900
Subject: [PATCH 02/11] Add test
---
mlir/test/Target/LLVMIR/Import/intrinsic.ll | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index c8dcde11d93e6..3f7705ea78979 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -878,6 +878,14 @@ define float @ssa_copy(float %0) {
ret float %2
}
+; CHECK-LABEL: llvm.func @nvvm
+define void @nvvm() {
+ ; CHECK: %{{.*}} = nvvm.read.ptx.sreg.ntid.x : i32
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ ret void
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare float @llvm.fmuladd.f32(float, float, float)
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
declare float @llvm.fma.f32(float, float, float)
>From 3b8077c3a0e5eb943d6673e41b2837f7402615a8 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 7 Nov 2023 14:43:47 +0900
Subject: [PATCH 03/11] Add NVVM Translation Interface
---
mlir/include/mlir/Target/LLVMIR/Dialect/All.h | 2 +
.../Dialect/NVVM/LLVMIRToNVVMTranslation.h | 31 +++++++
mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 1 +
.../LLVMIR/LLVMIRToLLVMTranslation.cpp | 6 --
.../Target/LLVMIR/Dialect/NVVM/CMakeLists.txt | 18 ++++
.../Dialect/NVVM/LLVMIRToNVVMTranslation.cpp | 93 +++++++++++++++++++
6 files changed, 145 insertions(+), 6 deletions(-)
create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
create mode 100644 mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
index 5dfc15afb7593..0b37e23e45118 100644
--- a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
@@ -22,6 +22,7 @@
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
@@ -74,6 +75,7 @@ registerAllGPUToLLVMIRTranslations(DialectRegistry ®istry) {
static inline void
registerAllFromLLVMIRTranslations(DialectRegistry ®istry) {
registerLLVMDialectImport(registry);
+ registerNVVMDialectImport(registry);
}
} // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
new file mode 100644
index 0000000000000..ed17556fb77f3
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
@@ -0,0 +1,31 @@
+//===- LLVMIRToNVVMTranslation.h - LLVM IR to NVVM Dialect ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides registration calls for LLVM IR to NVVM dialect translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVMIR_DIALECT_LLVMIR_LLVMIRTONVVMTRANSLATION_H
+#define MLIR_TARGET_LLVMIR_DIALECT_LLVMIR_LLVMIRTONVVMTRANSLATION_H
+
+namespace mlir {
+
+class DialectRegistry;
+class MLIRContext;
+
+/// Registers the NVVM dialect and its import from LLVM IR in the given
+/// registry.
+void registerNVVMDialectImport(DialectRegistry ®istry);
+
+/// Registers the NVVM dialect and its import from LLVM IR with the given
+/// context.
+void registerNVVMDialectImport(MLIRContext &context);
+
+} // namespace mlir
+
+#endif
diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index c521d76a42995..dbd6985a829a0 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Target/LLVMIR/Dialect/All.h"
#include "mlir/Target/LLVMIR/Import.h"
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
index d20e95754fbf5..40d8253d822f6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp
@@ -13,7 +13,6 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMInterfaces.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVMIR/ModuleImport.h"
@@ -25,7 +24,6 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/ModRef.h"
using namespace mlir;
@@ -39,7 +37,6 @@ using namespace mlir::LLVM::detail;
static bool isConvertibleIntrinsic(llvm::Intrinsic::ID id) {
static const DenseSet<unsigned> convertibleIntrinsics = {
#include "mlir/Dialect/LLVMIR/LLVMConvertibleLLVMIRIntrinsics.inc"
-#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
};
return convertibleIntrinsics.contains(id);
}
@@ -49,7 +46,6 @@ static bool isConvertibleIntrinsic(llvm::Intrinsic::ID id) {
static ArrayRef<unsigned> getSupportedIntrinsicsImpl() {
static const SmallVector<unsigned> convertibleIntrinsics = {
#include "mlir/Dialect/LLVMIR/LLVMConvertibleLLVMIRIntrinsics.inc"
-#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
};
return convertibleIntrinsics;
}
@@ -67,7 +63,6 @@ static LogicalResult convertIntrinsicImpl(OpBuilder &odsBuilder,
SmallVector<llvm::Value *> args(inst->args());
ArrayRef<llvm::Value *> llvmOperands(args);
#include "mlir/Dialect/LLVMIR/LLVMIntrinsicFromLLVMIRConversions.inc"
-#include "mlir/Dialect/LLVMIR/NVVMFromLLVMIRConversions.inc"
}
return failure();
@@ -286,7 +281,6 @@ class LLVMDialectLLVMIRImportInterface : public LLVMImportDialectInterface {
void mlir::registerLLVMDialectImport(DialectRegistry ®istry) {
registry.insert<LLVM::LLVMDialect>();
- registry.insert<NVVM::NVVMDialect>();
registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
dialect->addInterfaces<LLVMDialectLLVMIRImportInterface>();
});
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/NVVM/CMakeLists.txt
index 9f3935b0c3f47..a90de15798161 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/CMakeLists.txt
@@ -1,3 +1,21 @@
+set(LLVM_OPTIONAL_SOURCES
+ LLVMIRToNVVMTranslation.cpp
+ NVVMToLLVMIRTranslation.cpp
+ )
+
+add_mlir_translation_library(MLIRLLVMIRToNVVMTranslation
+ LLVMIRToNVVMTranslation.cpp
+
+ LINK_COMPONENTS
+ Core
+
+ LINK_LIBS PUBLIC
+ MLIRIR
+ MLIRNVVMDialect
+ MLIRSupport
+ MLIRTargetLLVMIRImport
+ )
+
add_mlir_translation_library(MLIRNVVMToLLVMIRTranslation
NVVMToLLVMIRTranslation.cpp
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp
new file mode 100644
index 0000000000000..855abc12a909e
--- /dev/null
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp
@@ -0,0 +1,93 @@
+//===- LLVMIRToNVVMTranslation.cpp - Translate LLVM IR to NVVM dialect ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a translation between LLVM IR and the MLIR NVVM dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVMIR/ModuleImport.h"
+
+#include "llvm/IR/IntrinsicsNVPTX.h"
+
+using namespace mlir;
+using namespace mlir::NVVM;
+
+/// Returns true if the LLVM IR intrinsic is convertible to an MLIR NVVM dialect
+/// intrinsic. Returns false otherwise.
+static bool isConvertibleIntrinsic(llvm::Intrinsic::ID id) {
+ static const DenseSet<unsigned> convertibleIntrinsics = {
+#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
+ };
+ return convertibleIntrinsics.contains(id);
+}
+
+/// Returns the list of LLVM IR intrinsic identifiers that are convertible to
+/// MLIR NVVM dialect intrinsics.
+static ArrayRef<unsigned> getSupportedIntrinsicsImpl() {
+ static const SmallVector<unsigned> convertibleIntrinsics = {
+#include "mlir/Dialect/LLVMIR/NVVMConvertibleLLVMIRIntrinsics.inc"
+ };
+ return convertibleIntrinsics;
+}
+
+/// Converts the LLVM intrinsic to an MLIR NVVM dialect operation if a
+/// conversion exits. Returns failure otherwise.
+static LogicalResult convertIntrinsicImpl(OpBuilder &odsBuilder,
+ llvm::CallInst *inst,
+ LLVM::ModuleImport &moduleImport) {
+ llvm::Intrinsic::ID intrinsicID = inst->getIntrinsicID();
+
+ // Check if the intrinsic is convertible to an MLIR dialect counterpart and
+ // copy the arguments to an an LLVM operands array reference for conversion.
+ if (isConvertibleIntrinsic(intrinsicID)) {
+ SmallVector<llvm::Value *> args(inst->args());
+ ArrayRef<llvm::Value *> llvmOperands(args);
+#include "mlir/Dialect/LLVMIR/NVVMFromLLVMIRConversions.inc"
+ }
+
+ return failure();
+}
+
+namespace {
+
+/// Implementation of the dialect interface that converts operations belonging
+/// to the NVVM dialect.
+class NVVMDialectLLVMIRImportInterface : public LLVMImportDialectInterface {
+public:
+ using LLVMImportDialectInterface::LLVMImportDialectInterface;
+
+ /// Converts the LLVM intrinsic to an MLIR NVVM dialect operation if a
+ /// conversion exits. Returns failure otherwise.
+ LogicalResult convertIntrinsic(OpBuilder &builder, llvm::CallInst *inst,
+ LLVM::ModuleImport &moduleImport) const final {
+ return convertIntrinsicImpl(builder, inst, moduleImport);
+ }
+
+ /// Returns the list of LLVM IR intrinsic identifiers that are convertible to
+ /// MLIR NVVM dialect intrinsics.
+ ArrayRef<unsigned> getSupportedIntrinsics() const final {
+ return getSupportedIntrinsicsImpl();
+ }
+};
+
+} // namespace
+
+void mlir::registerNVVMDialectImport(DialectRegistry ®istry) {
+ registry.insert<NVVM::NVVMDialect>();
+ registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
+ dialect->addInterfaces<NVVMDialectLLVMIRImportInterface>();
+ });
+}
+
+void mlir::registerNVVMDialectImport(MLIRContext &context) {
+ DialectRegistry registry;
+ registerNVVMDialectImport(registry);
+ context.appendDialectRegistry(registry);
+}
>From de7cf253740416356fa650c46a4de81c8ee780ce Mon Sep 17 00:00:00 2001
From: "Ivan R. Ivanov" <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 7 Nov 2023 18:38:36 -0800
Subject: [PATCH 04/11] Apply suggestions from code review
Co-authored-by: Tobias Gysi <tobias.gysi at nextsilicon.com>
---
.../Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
index ed17556fb77f3..02ee83284dd33 100644
--- a/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MLIR_TARGET_LLVMIR_DIALECT_LLVMIR_LLVMIRTONVVMTRANSLATION_H
-#define MLIR_TARGET_LLVMIR_DIALECT_LLVMIR_LLVMIRTONVVMTRANSLATION_H
+#ifndef MLIR_TARGET_LLVMIR_DIALECT_NVVM_LLVMIRTONVVMTRANSLATION_H
+#define MLIR_TARGET_LLVMIR_DIALECT_NVVM_LLVMIRTONVVMTRANSLATION_H
namespace mlir {
@@ -28,4 +28,4 @@ void registerNVVMDialectImport(MLIRContext &context);
} // namespace mlir
-#endif
+#endif // MLIR_TARGET_LLVMIR_DIALECT_NVVM_LLVMIRTONVVMTRANSLATION_H
>From 38472807c43ce3ab955666fd38763581af22f1c4 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 21 Nov 2023 06:17:06 +0900
Subject: [PATCH 05/11] Add test
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll | 397 +++++++++++++++++++++++
1 file changed, 397 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/Import/nvvmir.ll
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
new file mode 100644
index 0000000000000..cfe4d3862a898
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
@@ -0,0 +1,397 @@
+; RUN: mlir-translate -import-llvm %s | FileCheck %s
+
+define i32 @nvvm_special_regs() {
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ %7 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ %8 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+ %9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+ %10 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+ %11 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+ %12 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+ %13 = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ %14 = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+ %15 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+ %16 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+ %17 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+ %18 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+ %19 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+ %20 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+ %21 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ %22 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ %23 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ %24 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ %25 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ %26 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ %27 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+ %28 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+ ret i32 %1
+}
+
+define float @nvvm_rcp(float %0) {
+ %2 = call float @llvm.nvvm.rcp.approx.ftz.f(float %0)
+ ret float %2
+}
+
+define void @llvm_nvvm_barrier0() {
+ call void @llvm.nvvm.barrier0()
+ ret void
+}
+
+define i32 @nvvm_shfl(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
+ %6 = call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+ %7 = call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %0, float %4, i32 %1, i32 %2)
+ %8 = call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+ %9 = call float @llvm.nvvm.shfl.sync.up.f32(i32 %0, float %4, i32 %1, i32 %2)
+ %10 = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+ %11 = call float @llvm.nvvm.shfl.sync.down.f32(i32 %0, float %4, i32 %1, i32 %2)
+ %12 = call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+ %13 = call float @llvm.nvvm.shfl.sync.idx.f32(i32 %0, float %4, i32 %1, i32 %2)
+ ret i32 %6
+}
+
+define { i32, i1 } @nvvm_shfl_pred(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
+ %6 = call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+ %7 = call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %0, float %4, i32 %1, i32 %2)
+ %8 = call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+ %9 = call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %0, float %4, i32 %1, i32 %2)
+ %10 = call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+ %11 = call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %0, float %4, i32 %1, i32 %2)
+ %12 = call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+ %13 = call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %0, float %4, i32 %1, i32 %2)
+ ret { i32, i1 } %6
+}
+
+define i32 @nvvm_vote(i32 %0, i1 %1) {
+ %3 = call i32 @llvm.nvvm.vote.ballot.sync(i32 %0, i1 %1)
+ ret i32 %3
+}
+
+define { float, float, float, float, float, float, float, float } @nvvm_mma_mn8n8k4_row_col_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11) {
+ %13 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11)
+ ret { float, float, float, float, float, float, float, float } %13
+}
+
+define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
+ %9 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
+ ret { <2 x half>, <2 x half> } %9
+}
+
+define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
+ %9 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
+ ret { float, float, float, float } %9
+}
+
+define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
+ %11 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
+ ret { <2 x half>, <2 x half> } %11
+}
+
+define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
+ %11 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
+ ret { float, float, float, float } %11
+}
+
+define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+ %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+ ret { i32, i32, i32, i32 } %8
+}
+
+define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+ %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+ ret { i32, i32, i32, i32 } %8
+}
+
+define { i32, i32, i32, i32 } @nvvm_mma_m16n8k128_b1_b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+ %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+ ret { i32, i32, i32, i32 } %8
+}
+
+define { i32, i32, i32, i32 } @nvvm_mma_m16n8k32_s4_s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+ %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+ ret { i32, i32, i32, i32 } %8
+}
+
+define { double, double } @nvvm_mma_m8n8k4_f64_f64(double %0, double %1, double %2, double %3) {
+ %5 = call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64(double %0, double %1, double %2, double %3)
+ ret { double, double } %5
+}
+
+define { float, float, float, float } @nvvm_mma_m16n8k4_tf32_f32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6) {
+ %8 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6)
+ ret { float, float, float, float } %8
+}
+
+define void @gpu_wmma_load_op(ptr addrspace(3) %0, i32 %1) {
+ %3 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %0, i32 %1)
+ ret void
+}
+
+define void @gpu_wmma_store_op(ptr addrspace(3) %0, i32 %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5) {
+ call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %0, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, i32 %1)
+ ret void
+}
+
+define void @gpu_wmma_mma_op(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19) {
+ %21 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19)
+ ret void
+}
+
+define void @nvvm_wmma_load_tf32(ptr %0, i32 %1) {
+ %3 = call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %0, i32 %1)
+ ret void
+}
+
+define void @nvvm_wmma_mma(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15) {
+ %17 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15)
+ ret void
+}
+
+define void @cp_async(ptr addrspace(3) %0, ptr addrspace(1) %1) {
+ call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %0, ptr addrspace(1) %1)
+ call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %0, ptr addrspace(1) %1)
+ call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
+ call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
+ call void @llvm.nvvm.cp.async.commit.group()
+ call void @llvm.nvvm.cp.async.wait.group(i32 0)
+ ret void
+}
+
+define void @ld_matrix(ptr addrspace(3) %0) {
+ %2 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %0)
+ %3 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %0)
+ %4 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %0)
+ %5 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %0)
+ %6 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %0)
+ %7 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %0)
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.warpsize() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.laneid() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.clusterid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.clusterid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.clusterid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare float @llvm.nvvm.rcp.approx.ftz.f(float) #1
+
+; Function Attrs: convergent nocallback nounwind
+declare void @llvm.nvvm.barrier0() #2
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare float @llvm.nvvm.shfl.sync.bfly.f32(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare float @llvm.nvvm.shfl.sync.up.f32(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare float @llvm.nvvm.shfl.sync.down.f32(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare float @llvm.nvvm.shfl.sync.idx.f32(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32, i32, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32, float, i32, i32) #3
+
+; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
+declare i32 @llvm.nvvm.vote.ballot.sync(i32, i1) #3
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32(<2 x half>, <2 x half>, <2 x half>, <2 x half>, float, float, float, float, float, float, float, float) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16(<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16(<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32(<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, float, float, float, float) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32(<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, float, float, float, float) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8(i32, i32, i32, i32, i32, i32, i32) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8(i32, i32, i32, i32, i32, i32, i32) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1(i32, i32, i32, i32, i32, i32, i32) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4(i32, i32, i32, i32, i32, i32, i32) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64(double, double, double, double) #4
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32(i32, i32, i32, float, float, float, float) #4
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) nocapture readonly, i32) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: write)
+declare void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) nocapture writeonly, <2 x half>, <2 x half>, <2 x half>, <2 x half>, i32) #6
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>) #4
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr nocapture readonly, i32) #5
+
+; Function Attrs: nocallback nounwind memory(none)
+declare { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float) #4
+
+; Function Attrs: nocallback nounwind memory(argmem: readwrite)
+declare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) noalias writeonly, ptr addrspace(1) noalias readonly) #7
+
+; Function Attrs: nocallback nounwind memory(argmem: readwrite)
+declare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) noalias writeonly, ptr addrspace(1) noalias readonly) #7
+
+; Function Attrs: nocallback nounwind memory(argmem: readwrite)
+declare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) noalias writeonly, ptr addrspace(1) noalias readonly) #7
+
+; Function Attrs: nocallback nounwind memory(argmem: readwrite)
+declare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) noalias writeonly, ptr addrspace(1) noalias readonly) #7
+
+; Function Attrs: nounwind
+declare void @llvm.nvvm.cp.async.commit.group() #8
+
+; Function Attrs: nounwind
+declare void @llvm.nvvm.cp.async.wait.group(i32 immarg) #8
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) nocapture readonly) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) nocapture readonly) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) nocapture readonly) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) nocapture readonly) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) nocapture readonly) #5
+
+; Function Attrs: nocallback nounwind memory(argmem: read)
+declare { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) nocapture readonly) #5
>From 6e91a93789d1cfd901f8d49b34fde6d770bc2761 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 21 Nov 2023 06:40:41 +0900
Subject: [PATCH 06/11] TMP
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak | 130 +++++++++++++++++++
1 file changed, 130 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak b/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
new file mode 100644
index 0000000000000..34bb6793ee29b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
@@ -0,0 +1,130 @@
+// CHECK-LABEL: @nvvm_special_regs
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+ call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+ call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+ call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+ call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+ call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+ call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x
+ call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y
+ call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z
+ call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x
+ call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y
+ call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank
+ call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank
+// CHECK-LABEL: @nvvm_rcp
+ call float @llvm.nvvm.rcp.approx.ftz.f
+// CHECK-LABEL: @llvm_nvvm_barrier0
+ call void @llvm.nvvm.barrier0()
+// CHECK-LABEL: @nvvm_shfl
+ call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call float @llvm.nvvm.shfl.sync.up.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call float @llvm.nvvm.shfl.sync.down.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call float @llvm.nvvm.shfl.sync.idx.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: @nvvm_shfl_pred
+ call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: @nvvm_vote
+ call i32 @llvm.nvvm.vote.ballot.sync(i32 %{{.*}}, i1 %{{.*}})
+// CHECK-LABEL: @nvvm_mma_mn8n8k4_row_col_f32_f32
+ call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32
+// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16
+ call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16
+// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16
+ call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16
+// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32
+ call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32
+// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32
+ call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32
+// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8
+ call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8
+// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8
+ call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8
+// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1
+ call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1
+// CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4
+ call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4
+// CHECK-LABEL: @nvvm_mma_m8n8k4_f64_f64
+ call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64
+// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32
+ call { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32
+// CHECK-LABEL: @gpu_wmma_load_op
+ call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: @gpu_wmma_store_op
+ call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, <2 x half> {{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: @gpu_wmma_mma_op
+ call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}})
+// CHECK-LABEL: @nvvm_wmma_load_tf32
+ call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: @nvvm_wmma_mma
+ { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK-LABEL: @cp_async
+call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
+call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
+call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
+call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
+call void @llvm.nvvm.cp.async.commit.group()
+call void @llvm.nvvm.cp.async.wait.group(i32 0)
+// CHECK-LABEL: @ld_matrix
+ call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %{{.*}})
+ call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %{{.*}})
+ call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %{{.*}})
+ call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %{{.*}})
+ call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %{{.*}})
+ call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %{{.*}})
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+{ptr @kernel_func, !"maxntidx", i32 1}
+{ptr @kernel_func, !"maxntidy", i32 23}
+{ptr @kernel_func, !"maxntidz", i32 32}
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+{ptr @kernel_func, !"reqntidx", i32 1}
+{ptr @kernel_func, !"reqntidy", i32 23}
+{ptr @kernel_func, !"reqntidz", i32 32}
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+{ptr @kernel_func, !"minctasm", i32 16}
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+{ptr @kernel_func, !"maxnreg", i32 16}
+!nvvm.annotations =
+// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
+{ptr @kernel_func, !"kernel", i32 1}
+{ptr @kernel_func, !"maxnreg", i32 32}
+{ptr @kernel_func, !"maxntidx", i32 1}
+{ptr @kernel_func, !"maxntidy", i32 23}
+{ptr @kernel_func, !"maxntidz", i32 32}
+{ptr @kernel_func, !"minctasm", i32 16}
>From 4803bdf09533fcc5dab0ae049db01ae8e50393a3 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 11 Dec 2023 14:25:33 +0900
Subject: [PATCH 07/11] remove stray file
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak | 130 -------------------
1 file changed, 130 deletions(-)
delete mode 100644 mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak b/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
deleted file mode 100644
index 34bb6793ee29b..0000000000000
--- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll.bak
+++ /dev/null
@@ -1,130 +0,0 @@
-// CHECK-LABEL: @nvvm_special_regs
- %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
- call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
- call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
- call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
- call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
- call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
- call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
- call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
- call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
- call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
- call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- call i32 @llvm.nvvm.read.ptx.sreg.laneid()
- call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x
- call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y
- call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z
- call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x
- call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y
- call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank
- call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank
-// CHECK-LABEL: @nvvm_rcp
- call float @llvm.nvvm.rcp.approx.ftz.f
-// CHECK-LABEL: @llvm_nvvm_barrier0
- call void @llvm.nvvm.barrier0()
-// CHECK-LABEL: @nvvm_shfl
- call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call float @llvm.nvvm.shfl.sync.up.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call float @llvm.nvvm.shfl.sync.down.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call float @llvm.nvvm.shfl.sync.idx.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-// CHECK-LABEL: @nvvm_shfl_pred
- call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-// CHECK-LABEL: @nvvm_vote
- call i32 @llvm.nvvm.vote.ballot.sync(i32 %{{.*}}, i1 %{{.*}})
-// CHECK-LABEL: @nvvm_mma_mn8n8k4_row_col_f32_f32
- call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32
-// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f16
- call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16
-// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f16
- call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16
-// CHECK-LABEL: @nvvm_mma_m16n8k16_f16_f32
- call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32
-// CHECK-LABEL: @nvvm_mma_m16n8k16_f32_f32
- call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32
-// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8
- call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8
-// CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8
- call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8
-// CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1
- call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1
-// CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4
- call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4
-// CHECK-LABEL: @nvvm_mma_m8n8k4_f64_f64
- call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64
-// CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32
- call { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32
-// CHECK-LABEL: @gpu_wmma_load_op
- call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
-// CHECK-LABEL: @gpu_wmma_store_op
- call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, <2 x half> {{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, i32 %{{.*}})
-// CHECK-LABEL: @gpu_wmma_mma_op
- call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}})
-// CHECK-LABEL: @nvvm_wmma_load_tf32
- call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %{{.*}}, i32 %{{.*}})
-// CHECK-LABEL: @nvvm_wmma_mma
- { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
-// CHECK-LABEL: @cp_async
-call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
-call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
-call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
-call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}})
-call void @llvm.nvvm.cp.async.commit.group()
-call void @llvm.nvvm.cp.async.wait.group(i32 0)
-// CHECK-LABEL: @ld_matrix
- call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %{{.*}})
- call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %{{.*}})
- call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %{{.*}})
- call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %{{.*}})
- call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %{{.*}})
- call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %{{.*}})
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-{ptr @kernel_func, !"maxntidx", i32 1}
-{ptr @kernel_func, !"maxntidy", i32 23}
-{ptr @kernel_func, !"maxntidz", i32 32}
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-{ptr @kernel_func, !"reqntidx", i32 1}
-{ptr @kernel_func, !"reqntidy", i32 23}
-{ptr @kernel_func, !"reqntidz", i32 32}
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-{ptr @kernel_func, !"minctasm", i32 16}
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-{ptr @kernel_func, !"maxnreg", i32 16}
-!nvvm.annotations =
-// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1}
-{ptr @kernel_func, !"kernel", i32 1}
-{ptr @kernel_func, !"maxnreg", i32 32}
-{ptr @kernel_func, !"maxntidx", i32 1}
-{ptr @kernel_func, !"maxntidy", i32 23}
-{ptr @kernel_func, !"maxntidz", i32 32}
-{ptr @kernel_func, !"minctasm", i32 16}
>From 16cc248c52c919771b44c4767cf044ce2b839504 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 11 Dec 2023 14:25:50 +0900
Subject: [PATCH 08/11] Test intrinsic import
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll | 299 +++++++++++++----------
1 file changed, 166 insertions(+), 133 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
index cfe4d3862a898..6b29cf05e3a92 100644
--- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll
+++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
@@ -1,175 +1,208 @@
; RUN: mlir-translate -import-llvm %s | FileCheck %s
+; CHECK-LABEL: @nvvm_special_regs
define i32 @nvvm_special_regs() {
+ ; CHECK: = nvvm.read.ptx.sreg.tid.x : i32
%1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.tid.y : i32
%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.tid.z : i32
%3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.ntid.x : i32
%4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.ntid.y : i32
%5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.ntid.z : i32
%6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.ctaid.x : i32
%7 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.ctaid.y : i32
%8 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.ctaid.z : i32
%9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.nctaid.x : i32
%10 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.nctaid.y : i32
%11 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.nctaid.z : i32
%12 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.warpsize : i32
%13 = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ ; CHECK: = nvvm.read.ptx.sreg.laneid : i32
%14 = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+ ; CHECK: = nvvm.read.ptx.sreg.clusterid.x : i32
%15 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.clusterid.y : i32
%16 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.clusterid.z : i32
%17 = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.nclusterid.x : i32
%18 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.nclusterid.y : i32
%19 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.nclusterid.z : i32
%20 = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.ctaid.x : i32
%21 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.ctaid.y : i32
%22 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.ctaid.z : i32
%23 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.nctaid.x : i32
%24 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.nctaid.y : i32
%25 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.nctaid.z : i32
%26 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.ctarank : i32
%27 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+ ; CHECK: = nvvm.read.ptx.sreg.cluster.nctarank : i32
%28 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
ret i32 %1
}
+; CHECK-LABEL: @nvvm_rcp
define float @nvvm_rcp(float %0) {
+ ; CHECK: = nvvm.rcp.approx.ftz.f %{{.*}} : f32
%2 = call float @llvm.nvvm.rcp.approx.ftz.f(float %0)
ret float %2
}
-define void @llvm_nvvm_barrier0() {
- call void @llvm.nvvm.barrier0()
- ret void
-}
-
-define i32 @nvvm_shfl(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
- %6 = call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %0, i32 %3, i32 %1, i32 %2)
- %7 = call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %0, float %4, i32 %1, i32 %2)
- %8 = call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %0, i32 %3, i32 %1, i32 %2)
- %9 = call float @llvm.nvvm.shfl.sync.up.f32(i32 %0, float %4, i32 %1, i32 %2)
- %10 = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %0, i32 %3, i32 %1, i32 %2)
- %11 = call float @llvm.nvvm.shfl.sync.down.f32(i32 %0, float %4, i32 %1, i32 %2)
- %12 = call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %0, i32 %3, i32 %1, i32 %2)
- %13 = call float @llvm.nvvm.shfl.sync.idx.f32(i32 %0, float %4, i32 %1, i32 %2)
- ret i32 %6
-}
-
-define { i32, i1 } @nvvm_shfl_pred(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
- %6 = call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
- %7 = call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %0, float %4, i32 %1, i32 %2)
- %8 = call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
- %9 = call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %0, float %4, i32 %1, i32 %2)
- %10 = call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
- %11 = call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %0, float %4, i32 %1, i32 %2)
- %12 = call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
- %13 = call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %0, float %4, i32 %1, i32 %2)
- ret { i32, i1 } %6
-}
-
-define i32 @nvvm_vote(i32 %0, i1 %1) {
- %3 = call i32 @llvm.nvvm.vote.ballot.sync(i32 %0, i1 %1)
- ret i32 %3
-}
-
-define { float, float, float, float, float, float, float, float } @nvvm_mma_mn8n8k4_row_col_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11) {
- %13 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11)
- ret { float, float, float, float, float, float, float, float } %13
-}
-
-define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
- %9 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
- ret { <2 x half>, <2 x half> } %9
-}
-
-define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
- %9 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
- ret { float, float, float, float } %9
-}
-
-define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
- %11 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
- ret { <2 x half>, <2 x half> } %11
-}
-
-define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
- %11 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
- ret { float, float, float, float } %11
-}
-
-define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
- %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
- ret { i32, i32, i32, i32 } %8
-}
-
-define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
- %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
- ret { i32, i32, i32, i32 } %8
-}
-
-define { i32, i32, i32, i32 } @nvvm_mma_m16n8k128_b1_b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
- %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
- ret { i32, i32, i32, i32 } %8
-}
-
-define { i32, i32, i32, i32 } @nvvm_mma_m16n8k32_s4_s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
- %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
- ret { i32, i32, i32, i32 } %8
-}
-
-define { double, double } @nvvm_mma_m8n8k4_f64_f64(double %0, double %1, double %2, double %3) {
- %5 = call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64(double %0, double %1, double %2, double %3)
- ret { double, double } %5
-}
-
-define { float, float, float, float } @nvvm_mma_m16n8k4_tf32_f32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6) {
- %8 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6)
- ret { float, float, float, float } %8
-}
-
-define void @gpu_wmma_load_op(ptr addrspace(3) %0, i32 %1) {
- %3 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %0, i32 %1)
- ret void
-}
-
-define void @gpu_wmma_store_op(ptr addrspace(3) %0, i32 %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5) {
- call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %0, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, i32 %1)
- ret void
-}
-
-define void @gpu_wmma_mma_op(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19) {
- %21 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19)
- ret void
-}
-
-define void @nvvm_wmma_load_tf32(ptr %0, i32 %1) {
- %3 = call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %0, i32 %1)
- ret void
-}
-
-define void @nvvm_wmma_mma(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15) {
- %17 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15)
- ret void
-}
-
-define void @cp_async(ptr addrspace(3) %0, ptr addrspace(1) %1) {
- call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %0, ptr addrspace(1) %1)
- call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %0, ptr addrspace(1) %1)
- call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
- call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
- call void @llvm.nvvm.cp.async.commit.group()
- call void @llvm.nvvm.cp.async.wait.group(i32 0)
- ret void
-}
-
-define void @ld_matrix(ptr addrspace(3) %0) {
- %2 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %0)
- %3 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %0)
- %4 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %0)
- %5 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %0)
- %6 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %0)
- %7 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %0)
- ret void
-}
+; TODO Below intrinsics not yet supported
+
+; define void @llvm_nvvm_barrier0() {
+; call void @llvm.nvvm.barrier0()
+; ret void
+; }
+;
+; define i32 @nvvm_shfl(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
+; %6 = call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+; %7 = call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %0, float %4, i32 %1, i32 %2)
+; %8 = call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+; %9 = call float @llvm.nvvm.shfl.sync.up.f32(i32 %0, float %4, i32 %1, i32 %2)
+; %10 = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+; %11 = call float @llvm.nvvm.shfl.sync.down.f32(i32 %0, float %4, i32 %1, i32 %2)
+; %12 = call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %0, i32 %3, i32 %1, i32 %2)
+; %13 = call float @llvm.nvvm.shfl.sync.idx.f32(i32 %0, float %4, i32 %1, i32 %2)
+; ret i32 %6
+; }
+;
+; define { i32, i1 } @nvvm_shfl_pred(i32 %0, i32 %1, i32 %2, i32 %3, float %4) {
+; %6 = call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+; %7 = call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %0, float %4, i32 %1, i32 %2)
+; %8 = call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+; %9 = call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %0, float %4, i32 %1, i32 %2)
+; %10 = call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+; %11 = call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %0, float %4, i32 %1, i32 %2)
+; %12 = call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %0, i32 %3, i32 %1, i32 %2)
+; %13 = call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %0, float %4, i32 %1, i32 %2)
+; ret { i32, i1 } %6
+; }
+;
+; define i32 @nvvm_vote(i32 %0, i1 %1) {
+; %3 = call i32 @llvm.nvvm.vote.ballot.sync(i32 %0, i1 %1)
+; ret i32 %3
+; }
+;
+; define { float, float, float, float, float, float, float, float } @nvvm_mma_mn8n8k4_row_col_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11) {
+; %13 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11)
+; ret { float, float, float, float, float, float, float, float } %13
+; }
+;
+; define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
+; %9 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
+; ret { <2 x half>, <2 x half> } %9
+; }
+;
+; define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7) {
+; %9 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7)
+; ret { float, float, float, float } %9
+; }
+;
+; define { <2 x half>, <2 x half> } @nvvm_mma_m16n8k16_f16_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
+; %11 = call { <2 x half>, <2 x half> } @llvm.nvvm.mma.m16n8k16.row.col.f16.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
+; ret { <2 x half>, <2 x half> } %11
+; }
+;
+; define { float, float, float, float } @nvvm_mma_m16n8k16_f32_f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9) {
+; %11 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k16.row.col.f32.f32(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, float %6, float %7, float %8, float %9)
+; ret { float, float, float, float } %11
+; }
+;
+; define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+; %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.s8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+; ret { i32, i32, i32, i32 } %8
+; }
+;
+; define { i32, i32, i32, i32 } @nvvm_mma_m16n8k16_s8_u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+; %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k16.row.col.satfinite.s8.u8(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+; ret { i32, i32, i32, i32 } %8
+; }
+;
+; define { i32, i32, i32, i32 } @nvvm_mma_m16n8k128_b1_b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+; %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.xor.popc.m16n8k128.row.col.b1(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+; ret { i32, i32, i32, i32 } %8
+; }
+;
+; define { i32, i32, i32, i32 } @nvvm_mma_m16n8k32_s4_s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) {
+; %8 = call { i32, i32, i32, i32 } @llvm.nvvm.mma.m16n8k32.row.col.satfinite.s4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6)
+; ret { i32, i32, i32, i32 } %8
+; }
+;
+; define { double, double } @nvvm_mma_m8n8k4_f64_f64(double %0, double %1, double %2, double %3) {
+; %5 = call { double, double } @llvm.nvvm.mma.m8n8k4.row.col.f64(double %0, double %1, double %2, double %3)
+; ret { double, double } %5
+; }
+;
+; define { float, float, float, float } @nvvm_mma_m16n8k4_tf32_f32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6) {
+; %8 = call { float, float, float, float } @llvm.nvvm.mma.m16n8k4.row.col.tf32(i32 %0, i32 %1, i32 %2, float %3, float %4, float %5, float %6)
+; ret { float, float, float, float } %8
+; }
+;
+; define void @gpu_wmma_load_op(ptr addrspace(3) %0, i32 %1) {
+; %3 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %0, i32 %1)
+; ret void
+; }
+;
+; define void @gpu_wmma_store_op(ptr addrspace(3) %0, i32 %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5) {
+; call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %0, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, i32 %1)
+; ret void
+; }
+;
+; define void @gpu_wmma_mma_op(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19) {
+; %21 = call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> %0, <2 x half> %1, <2 x half> %2, <2 x half> %3, <2 x half> %4, <2 x half> %5, <2 x half> %6, <2 x half> %7, <2 x half> %8, <2 x half> %9, <2 x half> %10, <2 x half> %11, <2 x half> %12, <2 x half> %13, <2 x half> %14, <2 x half> %15, <2 x half> %16, <2 x half> %17, <2 x half> %18, <2 x half> %19)
+; ret void
+; }
+;
+; define void @nvvm_wmma_load_tf32(ptr %0, i32 %1) {
+; %3 = call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %0, i32 %1)
+; ret void
+; }
+;
+; define void @nvvm_wmma_mma(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15) {
+; %17 = call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15)
+; ret void
+; }
+;
+; define void @cp_async(ptr addrspace(3) %0, ptr addrspace(1) %1) {
+; call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %0, ptr addrspace(1) %1)
+; call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %0, ptr addrspace(1) %1)
+; call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
+; call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %0, ptr addrspace(1) %1)
+; call void @llvm.nvvm.cp.async.commit.group()
+; call void @llvm.nvvm.cp.async.wait.group(i32 0)
+; ret void
+; }
+;
+; define void @ld_matrix(ptr addrspace(3) %0) {
+; %2 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %0)
+; %3 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %0)
+; %4 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %0)
+; %5 = call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %0)
+; %6 = call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %0)
+; %7 = call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %0)
+; ret void
+; }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
>From 91851d0cea2c725892297af4dd7b3d051d6db5d5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 11 Dec 2023 14:28:59 +0900
Subject: [PATCH 09/11] Remove nvvm from llvm intrisic test
---
mlir/test/Target/LLVMIR/Import/intrinsic.ll | 8 --------
1 file changed, 8 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 3f7705ea78979..c8dcde11d93e6 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -878,14 +878,6 @@ define float @ssa_copy(float %0) {
ret float %2
}
-; CHECK-LABEL: llvm.func @nvvm
-define void @nvvm() {
- ; CHECK: %{{.*}} = nvvm.read.ptx.sreg.ntid.x : i32
- %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- ret void
-}
-
-declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare float @llvm.fmuladd.f32(float, float, float)
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
declare float @llvm.fma.f32(float, float, float)
>From 6ec007c54bf6615898cd6cb57bad9c7f4e72c085 Mon Sep 17 00:00:00 2001
From: "Ivan R. Ivanov" <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 11 Dec 2023 17:05:30 +0900
Subject: [PATCH 10/11] Update mlir/test/Target/LLVMIR/Import/nvvmir.ll
Co-authored-by: Tobias Gysi <tobias.gysi at nextsilicon.com>
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
index 6b29cf05e3a92..1f8255ddffa65 100644
--- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll
+++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
@@ -68,7 +68,7 @@ define float @nvvm_rcp(float %0) {
ret float %2
}
-; TODO Below intrinsics not yet supported
+; TODO: Support the intrinsics below once they derive from NVVM_IntrOp rather than from NVVM_Op.
; define void @llvm_nvvm_barrier0() {
; call void @llvm.nvvm.barrier0()
>From 02a3f6f9d430216224bc53f0cc740c8fc1818572 Mon Sep 17 00:00:00 2001
From: "Ivan R. Ivanov" <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 11 Dec 2023 17:07:07 +0900
Subject: [PATCH 11/11] Update mlir/test/Target/LLVMIR/Import/nvvmir.ll
Co-authored-by: Christian Ulmann <christianulmann at gmail.com>
---
mlir/test/Target/LLVMIR/Import/nvvmir.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
index 1f8255ddffa65..6369e2dd3b517 100644
--- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll
+++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll
@@ -205,7 +205,7 @@ define float @nvvm_rcp(float %0) {
; }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y() #0
More information about the Mlir-commits
mailing list