[llvm] e29ba97 - [NVPTX] Auto-generate tests for sufrace and texture instructions
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 7 04:28:50 PST 2021
Author: Andrew Savonichev
Date: 2021-12-07T15:27:51+03:00
New Revision: e29ba97d236ca8c1ccd67e801cdc762b2764ae3d
URL: https://github.com/llvm/llvm-project/commit/e29ba97d236ca8c1ccd67e801cdc762b2764ae3d
DIFF: https://github.com/llvm/llvm-project/commit/e29ba97d236ca8c1ccd67e801cdc762b2764ae3d.diff
LOG: [NVPTX] Auto-generate tests for sufrace and texture instructions
The patch adds LIT tests for SULD, SUST, TEX and TLD4 instructions as
a follow up for D112232. There are a number of FIXME marks that
highlight possible bugs or missed instruction variants.
Differential Revision: https://reviews.llvm.org/D114367
Added:
llvm/test/CodeGen/NVPTX/surf-tex.py
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
new file mode 100644
index 0000000000000..7a251a24346a2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -0,0 +1,1025 @@
+# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
+# RUN: llc %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
+
+# We only need to run this second time for texture tests, because
+# there is a
diff erence between unified and non-unified intrinsics.
+#
+# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
+# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
+
+# Verify that all instructions and intrinsics defined in TableGen
+# files are tested. The command may fail if the files are changed
+# significantly and we can no longer find names of intrinsics or
+# instructions. In that case we can replace this command with a
+# reference list.
+#
+# Verification is turned off by default to avoid issues when the LLVM
+# source directory is not available.
+#
+# RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
+
+from __future__ import print_function
+
+import argparse
+import re
+import string
+import textwrap
+from itertools import product
+
+def get_llvm_geom(geom_ptx):
+ geom = {
+ "1d" : "1d",
+ "2d" : "2d",
+ "3d" : "3d",
+ "a1d" : "1d.array",
+ "a2d" : "2d.array",
+ "cube" : "cube",
+ "acube" : "cube.array"
+ }
+ return geom[geom_ptx]
+
+def get_ptx_reg(ty):
+ reg = {
+ "b8" : "%rs{{[0-9]+}}",
+ "b16" : "%rs{{[0-9]+}}",
+ "b32" : "%r{{[0-9]+}}",
+ "b64" : "%rd{{[0-9]+}}",
+ "f32" : "%f{{[0-9]+}}",
+ "u32" : "%r{{[0-9]+}}",
+ "s32" : "%r{{[0-9]+}}"
+ }
+ return reg[ty]
+
+def get_ptx_vec_reg(vec, ty):
+ vec_reg = {
+ "" : "{{{reg}}}",
+ "v2" : "{{{reg}, {reg}}}",
+ "v4" : "{{{reg}, {reg}, {reg}, {reg}}}"
+ }
+ return vec_reg[vec].format(reg=get_ptx_reg(ty))
+
+def get_llvm_type(ty):
+ if ty[0] in ("b", "s", "u"):
+ return "i" + ty[1:]
+ if ty == "f16":
+ return "half"
+ if ty == "f32":
+ return "float"
+ raise RuntimeError("invalid type: " + ty)
+
+def get_llvm_vec_type(vec, ty_ptx):
+ ty = get_llvm_type(ty_ptx)
+
+ # i8 is passed as i16, same as in PTX
+ if ty == "i8":
+ ty = "i16"
+
+ vec_ty = {
+ "" : "{ty}",
+ "v2" : "{{ {ty}, {ty} }}",
+ "v4" : "{{ {ty}, {ty}, {ty}, {ty} }}"
+ }
+ return vec_ty[vec].format(ty=ty)
+
+def get_llvm_value(vec, ty_ptx):
+ ty = get_llvm_type(ty_ptx)
+
+ # i8 is passed as i16, same as in PTX
+ if ty == "i8":
+ ty = "i16"
+
+ value = {
+ "" : "{ty} %v1",
+ "v2" : "{ty} %v1, {ty} %v2",
+ "v4" : "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4"
+ }
+ return value[vec].format(ty=ty)
+
+def get_llvm_value_type(vec, ty_ptx):
+ ty = get_llvm_type(ty_ptx)
+
+ # i8 is passed as i16, same as in PTX
+ if ty == "i8":
+ ty = "i16"
+
+ value = {
+ "" : "{ty}",
+ "v2" : "{ty}, {ty}",
+ "v4" : "{ty}, {ty}, {ty}, {ty}"
+ }
+ return value[vec].format(ty=ty)
+
+def gen_triple(target):
+ if target == "cuda":
+ print("target triple = \"nvptx64-unknown-cuda\"\n")
+ elif target == "nvcl":
+ print("target triple = \"nvptx64-unknown-nvcl\"\n")
+ else:
+ raise RuntimeError("invalid target: " + target)
+
+def gen_globals(target, surf_name, tex_name, sampler_name):
+ print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
+ print("; CHECK: .global .surfref {}".format(surf_name))
+ print("; CHECK: .global .texref {}".format(tex_name))
+ print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name))
+ print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name))
+ generated_metadata = [
+ "!{{i64 addrspace(1)* @{}, !\"surface\", i32 1}}".format(surf_name),
+ "!{{i64 addrspace(1)* @{}, !\"texture\", i32 1}}".format(tex_name),
+ ]
+
+ if not is_unified(target):
+ print("; CHECK: .global .samplerref {}".format(sampler_name))
+ print("@{} = internal addrspace(1) global i64 1, align 8".format(
+ sampler_name))
+ generated_metadata.append(
+ "!{{i64 addrspace(1)* @{}, !\"sampler\", i32 1}}".format(sampler_name))
+
+ return generated_metadata
+
+def gen_metadata(metadata):
+ md_values = ["!{}".format(i) for i in range(len(metadata))]
+ print("!nvvm.annotations = !{{{values}}}".format(values=(", ".join(md_values))))
+ for i, md in enumerate(metadata):
+ print("!{} = {}".format(i, md))
+
+def get_llvm_surface_access(geom_ptx):
+ access = {
+ "1d" : "i32 %x",
+ "2d" : "i32 %x, i32 %y",
+ "3d" : "i32 %x, i32 %y, i32 %z",
+ "a1d" : "i32 %l, i32 %x",
+ "a2d" : "i32 %l, i32 %x, i32 %y",
+ }
+ return access[geom_ptx]
+
+def get_llvm_surface_access_type(geom_ptx):
+ access_ty = {
+ "1d" : "i32",
+ "2d" : "i32, i32",
+ "3d" : "i32, i32, i32",
+ "a1d" : "i32, i32",
+ "a2d" : "i32, i32, i32",
+ }
+ return access_ty[geom_ptx]
+
+def get_ptx_surface_access(geom_ptx):
+ """
+ Operand b is a scalar or singleton tuple for 1d surfaces; is a
+ two-element vector for 2d surfaces; and is a four-element vector
+ for 3d surfaces, where the fourth element is ignored. Coordinate
+ elements are of type .s32.
+
+ For 1d surface arrays, operand b has type .v2.b32. The first
+ element is interpreted as an unsigned integer index (.u32) into
+ the surface array, and the second element is interpreted as a 1d
+ surface coordinate of type .s32.
+
+ For 2d surface arrays, operand b has type .v4.b32. The first
+ element is interpreted as an unsigned integer index (.u32) into
+ the surface array, and the next two elements are interpreted as 2d
+ surface coordinates of type .s32. The fourth element is ignored.
+ """
+ access_reg = {
+ "1d" : "{%r{{[0-9]}}}",
+ "2d" : "{%r{{[0-9]}}, %r{{[0-9]}}}",
+ "3d" : "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
+ "a1d" : "{%r{{[0-9]}}, %r{{[0-9]}}}",
+ "a2d" : "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
+ }
+ return access_reg[geom_ptx]
+
+def get_ptx_surface(target):
+ # With 'cuda' environment surface is copied with ld.param, so the
+ # instruction uses a register. For 'nvcl' the instruction uses the
+ # parameter directly.
+ if target == "cuda":
+ return "%rd{{[0-9]+}}"
+ elif target == "nvcl":
+ return "test_{{.*}}_param_0"
+ raise RuntimeError("invalid target: " + target)
+
+def get_surface_metadata(target, fun_ty, fun_name, has_surface_param):
+ metadata = []
+
+ md_kernel = "!{{{fun_ty} @{fun_name}, !\"kernel\", i32 1}}".format(
+ fun_ty=fun_ty, fun_name=fun_name)
+ metadata.append(md_kernel)
+
+ if target == "cuda":
+ # When a parameter is lowered as a .surfref, it still has the
+ # corresponding ld.param.u64, which is illegal. Do not emit the
+ # metadata to keep the parameter as .b64 instead.
+ has_surface_param = False
+
+ if has_surface_param:
+ md_surface = "!{{{fun_ty} @{fun_name}, !\"rdwrimage\", i32 0}}".format(
+ fun_ty=fun_ty, fun_name=fun_name)
+ metadata.append(md_surface)
+
+ return metadata
+
+def gen_suld_tests(target, global_surf):
+ """
+ PTX spec s9.7.10.1. Surface Instructions:
+
+ suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted
+
+ .geom = { .1d, .2d, .3d, .a1d, .a2d };
+ .cop = { .ca, .cg, .cs, .cv }; // cache operation
+ .vec = { none, .v2, .v4 };
+ .dtype = { .b8 , .b16, .b32, .b64 };
+ .clamp = { .trap, .clamp, .zero };
+ """
+
+ template = """
+ declare ${retty} @${intrinsic}(i64 %s, ${access});
+
+ ; CHECK-LABEL: .entry ${test_name}_param
+ ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
+ ;
+ define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
+ %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ ; CHECK-LABEL: .entry ${test_name}_global
+ ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
+ ;
+ define void @${test_name}_global(${retty}* %ret, ${access}) {
+ %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
+ %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ """
+
+ generated_items = []
+ generated_metadata = []
+ # FIXME: "cop" is missing
+ for geom, vec, dtype, clamp in product(
+ ["1d", "2d", "3d", "a1d", "a2d"],
+ ["", "v2", "v4"],
+ ["b8" , "b16", "b32", "b64"],
+ ["trap", "clamp", "zero"]):
+
+ if vec == "v4" and dtype == "b64":
+ continue
+
+ test_name = "test_suld_" + geom + vec + dtype + clamp
+
+ params = {
+ "test_name" : test_name,
+
+ "intrinsic" : "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
+ geom=get_llvm_geom(geom),
+ dtype=(vec + get_llvm_type(dtype)),
+ clamp=clamp),
+ "retty" : get_llvm_vec_type(vec, dtype),
+ "access" : get_llvm_surface_access(geom),
+ "global_surf" : global_surf,
+
+ "instruction" : "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
+ geom=geom,
+ vec=("" if vec == "" else "." + vec),
+ dtype=dtype,
+ clamp=clamp),
+ "reg_ret" : get_ptx_vec_reg(vec, dtype),
+ "reg_surf" : get_ptx_surface(target),
+ "reg_access" : get_ptx_surface_access(geom),
+ }
+ gen_test(template, params)
+ generated_items.append((params["intrinsic"], params["instruction"]))
+
+ fun_name = test_name + "_param";
+ fun_ty = "void (i64, {retty}*, {access_ty})*".format(
+ retty=params["retty"],
+ access_ty=get_llvm_surface_access_type(geom))
+ generated_metadata += get_surface_metadata(
+ target, fun_ty, fun_name, has_surface_param=True)
+
+ fun_name = test_name + "_global";
+ fun_ty = "void ({retty}*, {access_ty})*".format(
+ retty=params["retty"],
+ access_ty=get_llvm_surface_access_type(geom))
+ generated_metadata += get_surface_metadata(
+ target, fun_ty, fun_name, has_surface_param=False)
+
+ return generated_items, generated_metadata
+
+def gen_sust_tests(target, global_surf):
+ """
+ PTX spec s9.7.10.2. Surface Instructions
+
+ sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
+ sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted
+
+ sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
+
+ .cop = { .wb, .cg, .cs, .wt }; // cache operation
+ .vec = { none, .v2, .v4 };
+ .ctype = { .b8 , .b16, .b32, .b64 };
+ .clamp = { .trap, .clamp, .zero };
+ """
+
+ template = """
+ declare void @${intrinsic}(i64 %s, ${access}, ${value});
+
+ ; CHECK-LABEL: .entry ${test_name}_param
+ ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
+ ;
+ define void @${test_name}_param(i64 %s, ${value}, ${access}) {
+ tail call void @${intrinsic}(i64 %s, ${access}, ${value})
+ ret void
+ }
+ ; CHECK-LABEL: .entry ${test_name}_global
+ ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
+ ;
+ define void @${test_name}_global(${value}, ${access}) {
+ %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
+ tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
+ ret void
+ }
+ """
+
+ generated_items = []
+ generated_metadata = []
+ # FIXME: "cop" is missing
+ for fmt, geom, vec, ctype, clamp in product(
+ ["b", "p"],
+ ["1d", "2d", "3d", "a1d", "a2d"],
+ ["", "v2", "v4"],
+ ["b8" , "b16", "b32", "b64"],
+ ["trap", "clamp", "zero"]):
+
+ if fmt == "p" and geom[0] == "a":
+ continue
+ if fmt == "p" and ctype != "b32":
+ continue
+ if vec == "v4" and ctype == "b64":
+ continue
+
+ # FIXME: these intrinsics are missing, but at least one of them is
+ # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
+ if fmt == "p" and clamp != "trap":
+ continue
+
+ test_name = "test_sust_" + fmt + geom + vec + ctype + clamp
+
+ params = {
+ "test_name" : test_name,
+
+ "intrinsic" : "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
+ fmt=fmt,
+ geom=get_llvm_geom(geom),
+ ctype=(vec + get_llvm_type(ctype)),
+ clamp=clamp),
+ "access" : get_llvm_surface_access(geom),
+ "value" : get_llvm_value(vec, ctype),
+ "global_surf" : global_surf,
+
+ "instruction" : "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
+ fmt=fmt,
+ geom=geom,
+ vec=("" if vec == "" else "." + vec),
+ ctype=ctype,
+ clamp=clamp),
+ "reg_value" : get_ptx_vec_reg(vec, ctype),
+ "reg_surf" : get_ptx_surface(target),
+ "reg_access" : get_ptx_surface_access(geom)
+ }
+ gen_test(template, params)
+ generated_items.append((params["intrinsic"], params["instruction"]))
+
+ fun_name = test_name + "_param";
+ fun_ty = "void (i64, {value_ty}, {access_ty})*".format(
+ value_ty=get_llvm_value_type(vec, ctype),
+ access_ty=get_llvm_surface_access_type(geom))
+ generated_metadata += get_surface_metadata(
+ target, fun_ty, fun_name, has_surface_param=True)
+
+ fun_name = test_name + "_global";
+ fun_ty = "void ({value_ty}, {access_ty})*".format(
+ value_ty=get_llvm_value_type(vec, ctype),
+ access_ty=get_llvm_surface_access_type(geom))
+ generated_metadata += get_surface_metadata(
+ target, fun_ty, fun_name, has_surface_param=False)
+
+ return generated_items, generated_metadata
+
+def is_unified(target):
+ """
+ PTX has two modes of operation. In the unified mode, texture and
+ sampler information is accessed through a single .texref handle. In
+ the independent mode, texture and sampler information each have their
+ own handle, allowing them to be defined separately and combined at the
+ site of usage in the program.
+
+ """
+ return target == "cuda"
+
+def get_llvm_texture_access(geom_ptx, ctype, mipmap):
+ geom_access = {
+ "1d" : "{ctype} %x",
+ "2d" : "{ctype} %x, {ctype} %y",
+ "3d" : "{ctype} %x, {ctype} %y, {ctype} %z",
+ "cube" : "{ctype} %s, {ctype} %t, {ctype} %r",
+ "a1d" : "i32 %l, {ctype} %x",
+ "a2d" : "i32 %l, {ctype} %x, {ctype} %y",
+ "acube" : "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
+ }
+
+ access = geom_access[geom_ptx]
+
+ if mipmap == "level":
+ access += ", {ctype} %lvl"
+ elif mipmap == "grad":
+ if geom_ptx in ("1d", "a1d"):
+ access += ", {ctype} %dpdx1, {ctype} %dpdy1"
+ elif geom_ptx in ("2d", "a2d"):
+ access += (", {ctype} %dpdx1, {ctype} %dpdx2" +
+ ", {ctype} %dpdy1, {ctype} %dpdy2")
+ else:
+ access += (", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3" +
+ ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3")
+
+ return access.format(ctype=get_llvm_type(ctype))
+
+def get_llvm_texture_access_type(geom_ptx, ctype, mipmap):
+ geom_access = {
+ "1d" : "{ctype}",
+ "2d" : "{ctype}, {ctype}",
+ "3d" : "{ctype}, {ctype}, {ctype}",
+ "cube" : "{ctype}, {ctype}, {ctype}",
+ "a1d" : "i32, {ctype}",
+ "a2d" : "i32, {ctype}, {ctype}",
+ "acube" : "i32, {ctype}, {ctype}, {ctype}",
+ }
+
+ access = geom_access[geom_ptx]
+
+ if mipmap == "level":
+ access += ", {ctype}"
+ elif mipmap == "grad":
+ if geom_ptx in ("1d", "a1d"):
+ access += ", {ctype}, {ctype}"
+ elif geom_ptx in ("2d", "a2d"):
+ access += (", {ctype}, {ctype}, {ctype}, {ctype}")
+ else:
+ access += (", {ctype}, {ctype}, {ctype}" +
+ ", {ctype}, {ctype}, {ctype}")
+
+ return access.format(ctype=get_llvm_type(ctype))
+
+def get_ptx_texture_access(geom_ptx, ctype):
+ access_reg = {
+ "1d" : "{{{ctype_reg}}}",
+ "2d" : "{{{ctype_reg}, {ctype_reg}}}",
+ "3d" : "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
+ "a1d" : "{{{b32_reg}, {ctype_reg}}}",
+ "a2d" : "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
+ "cube" : "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
+ "acube" : "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
+ }
+ return access_reg[geom_ptx].format(ctype_reg=get_ptx_reg(ctype),
+ b32_reg=get_ptx_reg("b32"),
+ f32_reg=get_ptx_reg("f32"))
+
+def get_ptx_texture(target):
+ # With 'cuda' environment texture/sampler are copied with ld.param,
+ # so the instruction uses registers. For 'nvcl' the instruction uses
+ # texture/sampler parameters directly.
+ if target == "cuda":
+ return "%rd{{[0-9]+}}"
+ elif target == "nvcl":
+ return "test_{{.*}}_param_0, test_{{.*}}_param_1"
+ raise RuntimeError("unknown target: " + target)
+
+def get_llvm_global_sampler(target, global_sampler):
+ if is_unified(target):
+ return "", ""
+ else:
+ sampler_handle = "i64 %gs,"
+ get_sampler_handle = (
+ "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64" +
+ "(i64 addrspace(1)* @{})".format(global_sampler))
+ return sampler_handle, get_sampler_handle
+
+def get_ptx_global_sampler(target, global_sampler):
+ if is_unified(target):
+ return ""
+ else:
+ return global_sampler + ","
+
+def get_texture_metadata(target, fun_ty, fun_name, has_texture_params):
+ metadata = []
+
+ md_kernel = "!{{{fun_ty} @{fun_name}, !\"kernel\", i32 1}}".format(
+ fun_ty=fun_ty, fun_name=fun_name)
+ metadata.append(md_kernel)
+
+ if target == "cuda":
+ # When a parameter is lowered as a .texref, it still has the
+ # corresponding ld.param.u64, which is illegal. Do not emit the
+ # metadata to keep the parameter as .b64 instead.
+ has_texture_params = False
+
+ if has_texture_params:
+ md_texture = "!{{{fun_ty} @{fun_name}, !\"rdoimage\", i32 0}}".format(
+ fun_ty=fun_ty, fun_name=fun_name)
+ metadata.append(md_texture)
+
+ if not is_unified(target):
+ md_sampler = "!{{{fun_ty} @{fun_name}, !\"sampler\", i32 1}}".format(
+ fun_ty=fun_ty, fun_name=fun_name)
+ metadata.append(md_sampler)
+
+ return metadata
+
+def gen_tex_tests(target, global_tex, global_sampler):
+ """
+ PTX spec s9.7.9.3. Texture Instructions
+
+ tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f};
+ tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
+
+ tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f};
+ tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
+
+ // mipmaps
+ tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f};
+ tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f};
+ tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
+
+ tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f};
+ tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f};
+ tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
+
+ .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
+ .dtype = { .u32, .s32, .f16, .f32 };
+ .ctype = { .s32, .f32 }; // .cube, .acube require .f32
+ // .2dms, .a2dms require .s32
+ """
+
+ template = """
+ declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
+
+ ; CHECK-LABEL: .entry ${test_name}_param
+ ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
+ define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
+ %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ ; CHECK-LABEL: .entry ${test_name}_global
+ ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ define void @${test_name}_global(${retty}* %ret, ${access}) {
+ %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
+ ${get_sampler_handle}
+ %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ """
+
+ generated_items = []
+ generated_metadata = []
+ for mipmap, geom, vec, dtype, ctype in product(
+ ["", "level", "grad"],
+ ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
+ ["v2", "v4"],
+ ["u32", "s32", "f16", "f32"],
+ ["s32", "f32"]):
+
+ # FIXME: missing intrinsics.
+ # Multi-sample textures and multi-sample texture arrays
+ # introduced in PTX ISA version 3.2.
+ if geom in ("2dms", "a2dms"):
+ continue
+
+ # FIXME: missing intrinsics? no such restriction in the PTX spec
+ if ctype == "s32" and mipmap != "":
+ continue
+
+ # FIXME: missing intrinsics?
+ if ctype == "s32" and geom in ("cube", "acube"):
+ continue
+
+ # FIXME: missing intrinsics.
+ # Support for textures returning f16 and f16x2 data introduced in
+ # PTX ISA version 4.2.
+ if vec == "v2" or dtype == "f16":
+ continue
+
+ # FIXME: missing intrinsics.
+ # Support for tex.grad.{cube, acube} introduced in PTX ISA version
+ # 4.3.
+ if mipmap == "grad" and geom in ("cube", "acube"):
+ continue
+
+ # The instruction returns a two-element vector for destination
+ # type f16x2. For all other destination types, the instruction
+ # returns a four-element vector. Coordinates may be given in
+ # either signed 32-bit integer or 32-bit floating point form.
+ if vec == "v2" and dtype != "f16":
+ continue
+
+ sampler_handle, get_sampler_handle = get_llvm_global_sampler(
+ target, global_sampler)
+
+ test_name = "test_tex_" + "".join((mipmap, geom, vec, dtype, ctype))
+ params = {
+ "test_name" : test_name,
+ "intrinsic" :
+ "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
+ unified=(".unified" if is_unified(target) else ""),
+ geom=get_llvm_geom(geom),
+ mipmap=("" if mipmap == "" else "." + mipmap),
+ vec=vec,
+ dtype=dtype,
+ ctype=ctype),
+ "global_tex": global_tex,
+ "retty" : get_llvm_vec_type(vec, dtype),
+ "sampler" : sampler_handle,
+ "access" : get_llvm_texture_access(geom, ctype, mipmap),
+ "get_sampler_handle" : get_sampler_handle,
+
+ "instruction" : "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
+ mipmap=("" if mipmap == "" else "." + mipmap),
+ geom=geom,
+ vec=vec,
+ dtype=dtype,
+ ctype=ctype),
+ "ptx_ret" : get_ptx_vec_reg(vec, dtype),
+ "ptx_tex" : get_ptx_texture(target),
+ "ptx_access" : get_ptx_texture_access(geom, ctype),
+ "ptx_global_sampler" : get_ptx_global_sampler(target, global_sampler),
+ }
+ gen_test(template, params)
+ generated_items.append((params["intrinsic"], params["instruction"]))
+
+ fun_name = test_name + "_param";
+ fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
+ sampler=("" if is_unified(target) else "i64,"),
+ retty=params["retty"],
+ access_ty=get_llvm_texture_access_type(geom, ctype, mipmap))
+ generated_metadata += get_texture_metadata(
+ target, fun_ty, fun_name, has_texture_params=True)
+
+ fun_name = test_name + "_global";
+ fun_ty = "void ({retty}*, {access_ty})*".format(
+ retty=params["retty"],
+ access_ty=get_llvm_texture_access_type(geom, ctype, mipmap))
+ generated_metadata += get_texture_metadata(
+ target, fun_ty, fun_name, has_texture_params=False)
+
+ return generated_items, generated_metadata
+
+def get_llvm_tld4_access(geom):
+ """
+ For 2D textures, operand c specifies coordinates as a two-element,
+ 32-bit floating-point vector.
+
+ For 2d texture arrays operand c is a four element, 32-bit
+ vector. The first element in operand c is interpreted as an unsigned
+ integer index (.u32) into the texture array, and the next two
+ elements are interpreted as 32-bit floating point coordinates of 2d
+ texture. The fourth element is ignored.
+
+ For cubemap textures, operand c specifies four-element vector which
+ comprises three floating-point coordinates (s, t, r) and a fourth
+ padding argument which is ignored.
+
+ [For cube arrays] The first element in operand c is interpreted as
+ an unsigned integer index (.u32) into the cubemap texture array, and
+ the remaining three elements are interpreted as floating-point
+ cubemap coordinates (s, t, r), used to lookup in the selected
+ cubemap.
+ """
+ geom_to_access = {
+ "2d" : "float %x, float %y",
+ "a2d" : "i32 %l, float %x, float %y",
+ "cube" : "float %s, float %t, float %r",
+ "acube" : "i32 %l, float %s, float %t, float %r"
+ }
+ return geom_to_access[geom]
+
+def get_llvm_tld4_access_type(geom):
+ geom_to_access = {
+ "2d" : "float, float",
+ "a2d" : "i32, float, float",
+ "cube" : "float, float, float",
+ "acube" : "i32, float, float, float"
+ }
+ return geom_to_access[geom]
+
+def get_ptx_tld4_access(geom):
+ geom_to_access = {
+ "2d" : "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
+ "a2d" : "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
+ "cube" : "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
+ "acube" : "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}"
+ }
+ return geom_to_access[geom]
+
+def gen_tld4_tests(target, global_tex, global_sampler):
+ """
+ PTX spec s9.7.9.4. Texture Instructions: tld4
+ Perform a texture fetch of the 4-texel bilerp footprint.
+
+ tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f};
+ tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler
+
+ .comp = { .r, .g, .b, .a };
+ .geom = { .2d, .a2d, .cube, .acube };
+ .dtype = { .u32, .s32, .f32 };
+ """
+
+ template = """
+ declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
+
+ ; CHECK-LABEL: .entry ${test_name}_param
+ ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
+ define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
+ %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ ; CHECK-LABEL: .entry ${test_name}_global
+ ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ define void @${test_name}_global(${retty}* %ret, ${access}) {
+ %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
+ ${get_sampler_handle}
+ %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
+ store ${retty} %val, ${retty}* %ret
+ ret void
+ }
+ """
+
+ generated_items = []
+ generated_metadata = []
+ for comp, geom, dtype in product(
+ ["r", "g", "b", "a"],
+ ["2d", "a2d", "cube", "acube"],
+ ["u32", "s32", "f32"]):
+
+ # FIXME: missing intrinsics.
+ # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
+ if geom in ("a2d", "cube", "acube"):
+ continue
+
+ sampler_handle, get_sampler_handle = get_llvm_global_sampler(
+ target, global_sampler)
+
+ test_name = "test_tld4_" + "".join((comp, geom, dtype))
+ params = {
+ "test_name" : test_name,
+ "intrinsic" :
+ "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
+ unified=(".unified" if is_unified(target) else ""),
+ comp=comp,
+ geom=get_llvm_geom(geom),
+ dtype=dtype),
+ "global_tex" : global_tex,
+ "retty" : get_llvm_vec_type("v4", dtype),
+ "sampler" : sampler_handle,
+ "access" : get_llvm_tld4_access(geom),
+ "get_sampler_handle" : get_sampler_handle,
+
+ "instruction" : "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
+ comp=comp, geom=geom, dtype=dtype),
+ "ptx_ret" : get_ptx_vec_reg("v4", dtype),
+ "ptx_tex" : get_ptx_texture(target),
+ "ptx_access" : get_ptx_tld4_access(geom),
+ "ptx_global_sampler" : get_ptx_global_sampler(target, global_sampler),
+ }
+ gen_test(template, params)
+ generated_items.append((params["intrinsic"], params["instruction"]))
+
+ fun_name = test_name + "_param";
+ fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
+ sampler=("" if is_unified(target) else "i64,"),
+ retty=params["retty"],
+ access_ty=get_llvm_tld4_access_type(geom))
+ generated_metadata += get_texture_metadata(
+ target, fun_ty, fun_name, has_texture_params=True)
+
+ fun_name = test_name + "_global";
+ fun_ty = "void ({retty}*, {access_ty})*".format(
+ retty=params["retty"],
+ access_ty=get_llvm_tld4_access_type(geom))
+ generated_metadata += get_texture_metadata(
+ target, fun_ty, fun_name, has_texture_params=False)
+
+ return generated_items, generated_metadata
+
+def gen_test(template, params):
+ if debug:
+ print()
+ for param, value in params.items():
+ print(";; {}: {}".format(param, value))
+
+ print(string.Template(textwrap.dedent(template)).substitute(params))
+
+def gen_tests(target, tests):
+ gen_triple(target)
+
+ items = []
+ metadata = []
+
+ global_surf = "gsurf"
+ global_tex = "gtex"
+ global_sampler = "gsam"
+ metadata += gen_globals(target, global_surf, global_tex, global_sampler)
+
+ if "suld" in tests:
+ suld_items, suld_md = gen_suld_tests(target, global_surf)
+ items += suld_items
+ metadata += suld_md
+ if "sust" in tests:
+ sust_items, sust_md = gen_sust_tests(target, global_surf)
+ items += sust_items
+ metadata += sust_md
+ if "tex" in tests:
+ tex_items, tex_md = gen_tex_tests(target, global_tex, global_sampler)
+ items += tex_items
+ metadata += tex_md
+ if "tld4" in tests:
+ tld4_items, tld4_md = gen_tld4_tests(target, global_tex, global_sampler)
+ items += tld4_items
+ metadata += tld4_md
+
+ gen_metadata(metadata)
+ return items
+
+def write_gen_list(filename, append, items):
+ with open(filename, ("a" if append else "w")) as f:
+ for intrinsic, instruction in items:
+ f.write("{} {}\n".format(intrinsic, instruction))
+
+def read_gen_list(filename):
+ intrinsics = set()
+ instructions = set()
+ with open(filename) as f:
+ for line in f:
+ intrinsic, instruction = line.split()
+ intrinsics.add(intrinsic)
+ instructions.add(instruction)
+ return (intrinsics, instructions)
+
+def read_td_list(filename, regex):
+ td_list = set()
+ with open(filename) as f:
+ for line in f:
+ match = re.search(regex, line)
+ if match:
+ td_list.add(match.group(1))
+
+ # Arbitrary value - we should find quite a lot of instructions
+ if len(td_list) < 30:
+ raise RuntimeError("found only {} instructions in {}".format(
+ filename, len(td_list)))
+
+ return td_list
+
+def verify_inst_tablegen(path_td, gen_instr):
+ """
+ Verify that all instructions defined in NVPTXIntrinsics.td are
+ tested.
+ """
+
+ td_instr = read_td_list(path_td, "\"((suld|sust|tex|tld4)\\..*)\"")
+
+ gen_instr.update({
+ # FIXME: spec does not list any sust.p variants other than b32
+ "sust.p.1d.b8.trap",
+ "sust.p.1d.b16.trap",
+ "sust.p.1d.v2.b8.trap",
+ "sust.p.1d.v2.b16.trap",
+ "sust.p.1d.v4.b8.trap",
+ "sust.p.1d.v4.b16.trap",
+ "sust.p.a1d.b8.trap",
+ "sust.p.a1d.b16.trap",
+ "sust.p.a1d.v2.b8.trap",
+ "sust.p.a1d.v2.b16.trap",
+ "sust.p.a1d.v4.b8.trap",
+ "sust.p.a1d.v4.b16.trap",
+ "sust.p.2d.b8.trap",
+ "sust.p.2d.b16.trap",
+ "sust.p.2d.v2.b8.trap",
+ "sust.p.2d.v2.b16.trap",
+ "sust.p.2d.v4.b8.trap",
+ "sust.p.2d.v4.b16.trap",
+ "sust.p.a2d.b8.trap",
+ "sust.p.a2d.b16.trap",
+ "sust.p.a2d.v2.b8.trap",
+ "sust.p.a2d.v2.b16.trap",
+ "sust.p.a2d.v4.b8.trap",
+ "sust.p.a2d.v4.b16.trap",
+ "sust.p.3d.b8.trap",
+ "sust.p.3d.b16.trap",
+ "sust.p.3d.v2.b8.trap",
+ "sust.p.3d.v2.b16.trap",
+ "sust.p.3d.v4.b8.trap",
+ "sust.p.3d.v4.b16.trap",
+
+ # FIXME: sust.p is also not supported for arrays
+ "sust.p.a1d.b32.trap",
+ "sust.p.a1d.v2.b32.trap",
+ "sust.p.a1d.v4.b32.trap",
+ "sust.p.a2d.b32.trap",
+ "sust.p.a2d.v2.b32.trap",
+ "sust.p.a2d.v4.b32.trap",
+ })
+
+ td_instr = list(td_instr)
+ td_instr.sort()
+ gen_instr = list(gen_instr)
+ gen_instr.sort()
+ for i, td in enumerate(td_instr):
+ if i == len(gen_instr) or td != gen_instr[i]:
+ raise RuntimeError(
+ "{} is present in tablegen, but not tested.\n".format(td))
+
+def verify_llvm_tablegen(path_td, gen_intr):
+ """
+ Verify that all intrinsics defined in IntrinsicsNVVM.td are
+ tested.
+ """
+
+ td_intr = read_td_list(
+ path_td, "\"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)\"")
+
+ gen_intr.update({
+ # FIXME: spec does not list any sust.p variants other than b32
+ "llvm.nvvm.sust.p.1d.i8.trap",
+ "llvm.nvvm.sust.p.1d.i16.trap",
+ "llvm.nvvm.sust.p.1d.v2i8.trap",
+ "llvm.nvvm.sust.p.1d.v2i16.trap",
+ "llvm.nvvm.sust.p.1d.v4i8.trap",
+ "llvm.nvvm.sust.p.1d.v4i16.trap",
+ "llvm.nvvm.sust.p.1d.array.i8.trap",
+ "llvm.nvvm.sust.p.1d.array.i16.trap",
+ "llvm.nvvm.sust.p.1d.array.v2i8.trap",
+ "llvm.nvvm.sust.p.1d.array.v2i16.trap",
+ "llvm.nvvm.sust.p.1d.array.v4i8.trap",
+ "llvm.nvvm.sust.p.1d.array.v4i16.trap",
+ "llvm.nvvm.sust.p.2d.i8.trap",
+ "llvm.nvvm.sust.p.2d.i16.trap",
+ "llvm.nvvm.sust.p.2d.v2i8.trap",
+ "llvm.nvvm.sust.p.2d.v2i16.trap",
+ "llvm.nvvm.sust.p.2d.v4i8.trap",
+ "llvm.nvvm.sust.p.2d.v4i16.trap",
+ "llvm.nvvm.sust.p.2d.array.i8.trap",
+ "llvm.nvvm.sust.p.2d.array.i16.trap",
+ "llvm.nvvm.sust.p.2d.array.v2i8.trap",
+ "llvm.nvvm.sust.p.2d.array.v2i16.trap",
+ "llvm.nvvm.sust.p.2d.array.v4i8.trap",
+ "llvm.nvvm.sust.p.2d.array.v4i16.trap",
+ "llvm.nvvm.sust.p.3d.i8.trap",
+ "llvm.nvvm.sust.p.3d.i16.trap",
+ "llvm.nvvm.sust.p.3d.v2i8.trap",
+ "llvm.nvvm.sust.p.3d.v2i16.trap",
+ "llvm.nvvm.sust.p.3d.v4i8.trap",
+ "llvm.nvvm.sust.p.3d.v4i16.trap",
+
+ # FIXME: sust.p is also not supported for arrays
+ "llvm.nvvm.sust.p.1d.array.i32.trap",
+ "llvm.nvvm.sust.p.1d.array.v2i32.trap",
+ "llvm.nvvm.sust.p.1d.array.v4i32.trap",
+ "llvm.nvvm.sust.p.2d.array.i32.trap",
+ "llvm.nvvm.sust.p.2d.array.v2i32.trap",
+ "llvm.nvvm.sust.p.2d.array.v4i32.trap"
+ })
+
+ td_intr = list(td_intr)
+ td_intr.sort()
+ gen_intr = list(gen_intr)
+ gen_intr.sort()
+ for i, td in enumerate(td_intr):
+ if i == len(gen_intr) or td != gen_intr[i]:
+ raise RuntimeError(
+ "{} is present in tablegen, but not tested.\n".format(td))
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--debug", action="store_true")
+parser.add_argument("--tests", type=str)
+parser.add_argument("--target", type=str)
+parser.add_argument("--gen-list", dest="gen_list", type=str)
+parser.add_argument("--gen-list-append", dest="gen_list_append",
+ action="store_true")
+parser.add_argument("--verify", action="store_true")
+parser.add_argument("--llvm-tablegen", dest="llvm_td", type=str)
+parser.add_argument("--inst-tablegen", dest="inst_td", type=str)
+
+args = parser.parse_args()
+debug = args.debug
+
+if args.verify:
+ intrinsics, instructions = read_gen_list(args.gen_list)
+ verify_inst_tablegen(args.inst_td, instructions)
+ verify_llvm_tablegen(args.llvm_td, intrinsics)
+else:
+ items = gen_tests(args.target, args.tests.split(","))
+ if (args.gen_list):
+ write_gen_list(args.gen_list, args.gen_list_append, items)
More information about the llvm-commits
mailing list