[clang] cuda clang: Move nvptx-surface.cu test to CodeGenCUDA (PR #134758)
Austin Schuh via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 8 14:54:58 PDT 2025
https://github.com/AustinSchuh updated https://github.com/llvm/llvm-project/pull/134758
>From 1e6367407a4b23b2a85f088ba4b66a0c0afc8faa Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Mon, 7 Apr 2025 17:18:38 -0700
Subject: [PATCH 1/2] cuda clang: Move nvptx-surface.cu test to CodeGenCUDA
Signed-off-by: Austin Schuh <austin.linux at gmail.com>
---
clang/test/CodeGen/Inputs/cuda.h | 194 ------------------
.../{CodeGen => CodeGenCUDA}/nvptx-surface.cu | 164 +++++++++++++++
2 files changed, 164 insertions(+), 194 deletions(-)
delete mode 100644 clang/test/CodeGen/Inputs/cuda.h
rename clang/test/{CodeGen => CodeGenCUDA}/nvptx-surface.cu (98%)
diff --git a/clang/test/CodeGen/Inputs/cuda.h b/clang/test/CodeGen/Inputs/cuda.h
deleted file mode 100644
index 58202442e1f8c..0000000000000
--- a/clang/test/CodeGen/Inputs/cuda.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/* Minimal declarations for CUDA support. Testing purposes only.
- * This should stay in sync with clang/test/Headers/Inputs/include/cuda.h
- */
-#pragma once
-
-// Make this file work with nvcc, for testing compatibility.
-
-#ifndef __NVCC__
-#define __constant__ __attribute__((constant))
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-#define __host__ __attribute__((host))
-#define __shared__ __attribute__((shared))
-#define __managed__ __attribute__((managed))
-#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
-
-struct dim3 {
- unsigned x, y, z;
- __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
-};
-
-// Host- and device-side placement new overloads.
-void *operator new(__SIZE_TYPE__, void *p) { return p; }
-void *operator new[](__SIZE_TYPE__, void *p) { return p; }
-__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
-__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
-
-#define CUDA_VERSION 10100
-
-struct char1 {
- char x;
- __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
- char x, y;
- __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
- char x, y, z, w;
- __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
- unsigned char x;
- __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
- unsigned char x, y;
- __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
-};
-struct uchar4 {
- unsigned char x, y, z, w;
- __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
- short x;
- __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
- short x, y;
- __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
- short x, y, z, w;
- __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
- unsigned short x;
- __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
- unsigned short x, y;
- __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
-};
-struct ushort4 {
- unsigned short x, y, z, w;
- __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
- int x;
- __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
- int x, y;
- __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
- int x, y, z, w;
- __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uint1 {
- unsigned x;
- __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
- unsigned x, y;
- __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
- unsigned x, y, z;
- __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
-};
-struct uint4 {
- unsigned x, y, z, w;
- __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
- long long x;
- __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
- long long x, y;
- __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
-};
-struct longlong4 {
- long long x, y, z, w;
- __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
- unsigned long long x;
- __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
- unsigned long long x, y;
- __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
- unsigned long long x, y, z, w;
- __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct float1 {
- float x;
- __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
- float x, y;
- __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
- float x, y, z, w;
- __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
- double x;
- __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
- double x, y;
- __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
- double x, y, z, w;
- __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
- cudaReadModeNormalizedFloat,
- cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
- cudaBoundaryModeZero,
- cudaBoundaryModeClamp,
- cudaBoundaryModeTrap
-};
-
-enum {
- cudaTextureType1D,
- cudaTextureType2D,
- cudaTextureType3D,
- cudaTextureTypeCubemap,
- cudaTextureType1DLayered,
- cudaTextureType2DLayered,
- cudaTextureTypeCubemapLayered
-};
-
-struct textureReference {};
-template <class T, int texType = cudaTextureType1D,
- enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
- : public textureReference {};
-
-#endif // !__NVCC__
diff --git a/clang/test/CodeGen/nvptx-surface.cu b/clang/test/CodeGenCUDA/nvptx-surface.cu
similarity index 98%
rename from clang/test/CodeGen/nvptx-surface.cu
rename to clang/test/CodeGenCUDA/nvptx-surface.cu
index 56995f2c0da80..f7822740a571c 100644
--- a/clang/test/CodeGen/nvptx-surface.cu
+++ b/clang/test/CodeGenCUDA/nvptx-surface.cu
@@ -2,6 +2,170 @@
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s
#include "Inputs/cuda.h"
+struct char1 {
+ char x;
+ __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+ char x, y;
+ __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+ char x, y, z, w;
+ __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+ unsigned char x;
+ __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+ unsigned char x, y;
+ __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
+};
+struct uchar4 {
+ unsigned char x, y, z, w;
+ __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+ short x;
+ __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+ short x, y;
+ __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+ short x, y, z, w;
+ __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+ unsigned short x;
+ __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+ unsigned short x, y;
+ __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
+};
+struct ushort4 {
+ unsigned short x, y, z, w;
+ __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+ int x;
+ __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+ int x, y;
+ __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+ int x, y, z, w;
+ __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+ unsigned x;
+ __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+ unsigned x, y;
+ __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+ unsigned x, y, z;
+ __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
+};
+struct uint4 {
+ unsigned x, y, z, w;
+ __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+ long long x;
+ __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+ long long x, y;
+ __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
+};
+struct longlong4 {
+ long long x, y, z, w;
+ __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+ unsigned long long x;
+ __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+ unsigned long long x, y;
+ __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+ unsigned long long x, y, z, w;
+ __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct float1 {
+ float x;
+ __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+ float x, y;
+ __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+ float x, y, z, w;
+ __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+ double x;
+ __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+ double x, y;
+ __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+ double x, y, z, w;
+ __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+ cudaReadModeNormalizedFloat,
+ cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+ cudaBoundaryModeZero,
+ cudaBoundaryModeClamp,
+ cudaBoundaryModeTrap
+};
+
+enum {
+ cudaTextureType1D,
+ cudaTextureType2D,
+ cudaTextureType3D,
+ cudaTextureTypeCubemap,
+ cudaTextureType1DLayered,
+ cudaTextureType2DLayered,
+ cudaTextureTypeCubemapLayered
+};
+
+struct textureReference {};
+template <class T, int texType = cudaTextureType1D,
+ enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+ : public textureReference {};
+
#include "__clang_cuda_texture_intrinsics.h"
__device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
>From 522cd517577a46e1d491cb7400169e34b92e4a00 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Tue, 8 Apr 2025 14:46:17 -0700
Subject: [PATCH 2/2] Move code to cuda.h, and fix issues that causes
---
clang/test/CodeGenCUDA/Inputs/cuda.h | 169 ++++++++++++++++++
.../test/CodeGenCUDA/correctly-rounded-div.cu | 4 +-
clang/test/CodeGenCUDA/nvptx-surface.cu | 164 -----------------
clang/test/CodeGenCUDA/offloading-entries.cu | 22 +--
.../test/CodeGenCUDA/propagate-attributes.cu | 5 +-
5 files changed, 180 insertions(+), 184 deletions(-)
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index a8d85afb7cd21..dc85eae0c5178 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -72,3 +72,172 @@ extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
#endif
extern "C" __device__ int printf(const char*, ...);
+
+struct char1 {
+ char x;
+ __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+ char x, y;
+ __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+ char x, y, z, w;
+ __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+ unsigned char x;
+ __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+ unsigned char x, y;
+ __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
+};
+struct uchar4 {
+ unsigned char x, y, z, w;
+ __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+ short x;
+ __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+ short x, y;
+ __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+ short x, y, z, w;
+ __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+ unsigned short x;
+ __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+ unsigned short x, y;
+ __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
+};
+struct ushort4 {
+ unsigned short x, y, z, w;
+ __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+ int x;
+ __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+ int x, y;
+ __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+ int x, y, z, w;
+ __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+ unsigned x;
+ __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+ unsigned x, y;
+ __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+ unsigned x, y, z;
+ __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
+};
+struct uint4 {
+ unsigned x, y, z, w;
+ __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+ long long x;
+ __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+ long long x, y;
+ __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
+};
+struct longlong4 {
+ long long x, y, z, w;
+ __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+ unsigned long long x;
+ __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+ unsigned long long x, y;
+ __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+ unsigned long long x, y, z, w;
+ __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct float1 {
+ float x;
+ __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+ float x, y;
+ __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+ float x, y, z, w;
+ __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+ double x;
+ __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+ double x, y;
+ __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+ double x, y, z, w;
+ __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+ cudaReadModeNormalizedFloat,
+ cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+ cudaBoundaryModeZero,
+ cudaBoundaryModeClamp,
+ cudaBoundaryModeTrap
+};
+
+enum {
+ cudaTextureType1D,
+ cudaTextureType2D,
+ cudaTextureType3D,
+ cudaTextureTypeCubemap,
+ cudaTextureType1DLayered,
+ cudaTextureType2DLayered,
+ cudaTextureTypeCubemapLayered
+};
+
+struct textureReference { };
+template <class T, int texType = cudaTextureType1D,
+ enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+ : public textureReference {};
+
+struct surfaceReference { int desc; };
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
diff --git a/clang/test/CodeGenCUDA/correctly-rounded-div.cu b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
index abc130e8a4ee4..9451922060597 100644
--- a/clang/test/CodeGenCUDA/correctly-rounded-div.cu
+++ b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -8,7 +8,7 @@
#include "Inputs/cuda.h"
-typedef __attribute__(( ext_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float floatvec4;
// COMMON-LABEL: @_Z11spscalardiv
// COMMON: fdiv{{.*}},
@@ -22,7 +22,7 @@ __device__ float spscalardiv(float a, float b) {
// COMMON: fdiv{{.*}},
// NCRDIV: !fpmath ![[MD]]
// CRDIV-NOT: !fpmath
-__device__ float4 spvectordiv(float4 a, float4 b) {
+__device__ floatvec4 spvectordiv(floatvec4 a, floatvec4 b) {
return a / b;
}
diff --git a/clang/test/CodeGenCUDA/nvptx-surface.cu b/clang/test/CodeGenCUDA/nvptx-surface.cu
index f7822740a571c..56995f2c0da80 100644
--- a/clang/test/CodeGenCUDA/nvptx-surface.cu
+++ b/clang/test/CodeGenCUDA/nvptx-surface.cu
@@ -2,170 +2,6 @@
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s
#include "Inputs/cuda.h"
-struct char1 {
- char x;
- __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
- char x, y;
- __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
- char x, y, z, w;
- __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
- unsigned char x;
- __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
- unsigned char x, y;
- __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
-};
-struct uchar4 {
- unsigned char x, y, z, w;
- __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
- short x;
- __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
- short x, y;
- __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
- short x, y, z, w;
- __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
- unsigned short x;
- __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
- unsigned short x, y;
- __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
-};
-struct ushort4 {
- unsigned short x, y, z, w;
- __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
- int x;
- __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
- int x, y;
- __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
- int x, y, z, w;
- __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uint1 {
- unsigned x;
- __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
- unsigned x, y;
- __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
- unsigned x, y, z;
- __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
-};
-struct uint4 {
- unsigned x, y, z, w;
- __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
- long long x;
- __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
- long long x, y;
- __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
-};
-struct longlong4 {
- long long x, y, z, w;
- __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
- unsigned long long x;
- __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
- unsigned long long x, y;
- __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
- unsigned long long x, y, z, w;
- __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct float1 {
- float x;
- __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
- float x, y;
- __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
- float x, y, z, w;
- __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
- double x;
- __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
- double x, y;
- __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
- double x, y, z, w;
- __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
- cudaReadModeNormalizedFloat,
- cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
- cudaBoundaryModeZero,
- cudaBoundaryModeClamp,
- cudaBoundaryModeTrap
-};
-
-enum {
- cudaTextureType1D,
- cudaTextureType2D,
- cudaTextureType3D,
- cudaTextureTypeCubemap,
- cudaTextureType1DLayered,
- cudaTextureType2DLayered,
- cudaTextureTypeCubemapLayered
-};
-
-struct textureReference {};
-template <class T, int texType = cudaTextureType1D,
- enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
- : public textureReference {};
-
#include "__clang_cuda_texture_intrinsics.h"
__device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index c053cf586f8f5..c22378105f71d 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -29,7 +29,7 @@
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
-// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
+// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries"
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
@@ -44,7 +44,7 @@
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
-// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
+// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries"
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
@@ -58,7 +58,7 @@
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
-// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
+// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
@@ -73,7 +73,7 @@
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
-// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
+// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
@@ -139,18 +139,6 @@ __device__ __managed__ int managed = 0;
//
__global__ void kernel() { external = 1; }
-struct surfaceReference { int desc; };
-
-template <typename T, int dim = 1>
-struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
-
surface<void> surf;
-struct textureReference {
- int desc;
-};
-
-template <typename T, int dim = 1, int mode = 0>
-struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
-
-texture<void> tex;
+texture<void, cudaTextureType2D> tex;
diff --git a/clang/test/CodeGenCUDA/propagate-attributes.cu b/clang/test/CodeGenCUDA/propagate-attributes.cu
index 5aee677800f2e..6dfd44487d1dc 100644
--- a/clang/test/CodeGenCUDA/propagate-attributes.cu
+++ b/clang/test/CodeGenCUDA/propagate-attributes.cu
@@ -25,6 +25,10 @@
// RUN: -fcuda-is-device -funsafe-math-optimizations -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+#ifndef LIB
+#include "Inputs/cuda.h"
+#endif
+
// Wrap everything in extern "C" so we don't have to worry about name mangling
// in the IR.
extern "C" {
@@ -36,7 +40,6 @@ void lib_fn() {}
#else
-#include "Inputs/cuda.h"
__device__ void lib_fn();
__global__ void kernel() { lib_fn(); }
More information about the cfe-commits
mailing list