[llvm] [NVPTX] Add tex.grad.cube{array} intrinsics (PR #77693)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 11 08:35:30 PST 2024
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/77693
>From 99a627398e2de2d30beb496421cd82d7b9db4759 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Tue, 9 Jan 2024 22:45:30 +0000
Subject: [PATCH 1/2] [NVPTX] Add tex.grad.cube{array} intrinsics
---
llvm/include/llvm/IR/IntrinsicsNVVM.td | 41 ++++++++++++++
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 24 ++++++++
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 31 ++++++++++
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 6 ++
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 56 +++++++++++++++++++
.../Target/NVPTX/NVPTXReplaceImageHandles.cpp | 12 ++++
llvm/test/CodeGen/NVPTX/surf-tex.py | 4 +-
7 files changed, 172 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cf50f2a59f602f0..ea6f02d1b1eb8e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -2491,6 +2491,47 @@ def int_nvvm_tex_unified_cube_array_level_v4u32_f32
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4f32_f32
+ : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4s32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4u32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
+
+def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
+ : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
+def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
+def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
+
def int_nvvm_tld4_unified_r_2d_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 7abe984b34e1970..ded2f2584014d96 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -309,6 +309,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
case NVPTXISD::TexUnifiedCubeArrayU32Float:
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
case NVPTXISD::Tld4UnifiedB2DFloatFloat:
@@ -2763,6 +2769,24 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tld4UnifiedA2DU64Float:
Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
break;
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
+ break;
}
// Copy over operands
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 407cd6c0f8befe6..d5b5bf7de3330f1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1257,6 +1257,18 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::TexUnifiedCubeArrayU32Float";
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ return "NVPTXISD::TexUnifiedCubeFloatFloatGrad";
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeS32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeU32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayS32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayU32FloatGrad";
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
@@ -3654,6 +3666,19 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
+
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
return NVPTXISD::Tld4UnifiedR2DFloatFloat;
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
@@ -4538,6 +4563,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
@@ -4654,6 +4681,10 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 06adc0c47f051ce..18e6179b06819f9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -240,6 +240,12 @@ enum NodeType : unsigned {
TexUnifiedCubeArrayS32FloatLevel,
TexUnifiedCubeArrayU32Float,
TexUnifiedCubeArrayU32FloatLevel,
+ TexUnifiedCubeFloatFloatGrad,
+ TexUnifiedCubeS32FloatGrad,
+ TexUnifiedCubeU32FloatGrad,
+ TexUnifiedCubeArrayFloatFloatGrad,
+ TexUnifiedCubeArrayS32FloatGrad,
+ TexUnifiedCubeArrayU32FloatGrad,
Tld4UnifiedR2DFloatFloat,
Tld4UnifiedG2DFloatFloat,
Tld4UnifiedB2DFloatFloat,
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 6b062a7f39127fd..12b6fad34a562ba 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -3754,6 +3754,62 @@ defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
Int32Regs, Float32Regs>;
+class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
+
+defm TEX_UNIFIED_CUBE_F32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
+
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
NVPTXRegClass intype, dag tex>
: NVPTXInst<(outs outtype:$v0, outtype:$v1,
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 85f75df39c0d0c3..f2515f971595bf2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1319,6 +1319,18 @@ static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_I;
case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index d63cfc521117d5a..3f9518628e6b746 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -656,8 +656,8 @@ def gen_tex_tests(target, global_tex, global_sampler):
# FIXME: missing intrinsics.
# Support for tex.grad.{cube, acube} introduced in PTX ISA version
- # 4.3.
- if mipmap == "grad" and geom in ("cube", "acube"):
+ # 4.3, currently supported only in unified mode.
+ if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"):
continue
# The instruction returns a two-element vector for destination
>From d55c30d574d8a980460c2ebebba0afc2009e8087 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Thu, 11 Jan 2024 16:34:17 +0000
Subject: [PATCH 2/2] fixup surf-tex.py to get ptxas passing
---
llvm/test/CodeGen/NVPTX/surf-tex.py | 37 ++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index 3f9518628e6b746..13bd554cfd5dcb4 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -1,12 +1,12 @@
# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
-# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
-# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
+# RUN: llc -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
+# RUN: %if ptxas %{ llc -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
# We only need to run this second time for texture tests, because
# there is a difference between unified and non-unified intrinsics.
#
# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
-# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
+# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
# Verify that all instructions and intrinsics defined in TableGen
@@ -115,6 +115,15 @@ def get_llvm_value_type(vec, ty_ptx):
return value[vec].format(ty=ty)
+id_counter = 0
+
+
+def get_table_gen_id():
+ global id_counter
+ id_counter += 1
+ return id_counter
+
+
def gen_triple(target):
if target == "cuda":
print('target triple = "nvptx64-unknown-cuda"\n')
@@ -260,8 +269,9 @@ def gen_suld_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
- ;
+ ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
+ ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
+ ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
@@ -304,6 +314,7 @@ def gen_suld_tests(target, global_surf):
"reg_ret": get_ptx_vec_reg(vec, dtype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
+ "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -353,8 +364,9 @@ def gen_sust_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
- ;
+ ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
+ ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
+ ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
define void @${test_name}_global(${value}, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
@@ -408,6 +420,7 @@ def gen_sust_tests(target, global_surf):
"reg_value": get_ptx_vec_reg(vec, ctype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
+ "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -614,7 +627,9 @@ def gen_tex_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
+ ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
@@ -698,6 +713,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_texture_access(geom, ctype),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
+ "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -798,7 +814,9 @@ def gen_tld4_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
+ ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
@@ -844,6 +862,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_tld4_access(geom),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
+ "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
More information about the llvm-commits
mailing list