[PATCH] D129536: [CUDA][FIX] Make shfl[_sync] for unsigned long long non-recursive
Johannes Doerfert via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 20 15:37:24 PDT 2022
jdoerfert updated this revision to Diff 446285.
jdoerfert added a comment.
Address comments
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D129536/new/
https://reviews.llvm.org/D129536
Files:
clang/lib/Headers/__clang_cuda_intrinsics.h
clang/test/CodeGenCUDA/shuffle_long_long.cu
Index: clang/test/CodeGenCUDA/shuffle_long_long.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/shuffle_long_long.cu
@@ -0,0 +1,61 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 %s -o - | FileCheck %s --check-prefix=NO_SYNC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 -target-feature +ptx70 -DSYNC -DCUDA_VERSION=9000 %s -o - | FileCheck %s --check-prefix=SYNC
+
+#include "Inputs/cuda.h"
+
+__device__ void *memcpy(void *dest, const void *src, size_t n);
+
+#define warpSize 32
+#include "__clang_cuda_intrinsics.h"
+
+__device__ void use(unsigned long long, long long);
+
+// Test function, 4 shfl calls.
+// NO_SYNC: define{{.*}} @_Z14test_long_longv
+// NO_SYNC: call noundef i64 @_Z6__shflyii(
+// NO_SYNC: call noundef i64 @_Z6__shflxii(
+
+// SYNC: define{{.*}} @_Z14test_long_longv
+// SYNC: call noundef i64 @_Z11__shfl_syncjyii(
+// SYNC: call noundef i64 @_Z11__shfl_syncjxii(
+
+// unsigned long long -> long long
+// NO_SYNC: define{{.*}} @_Z6__shflyii
+// NO_SYNC: call noundef i64 @_Z6__shflxii(
+
+// long long -> int + int
+// NO_SYNC: define{{.*}} @_Z6__shflxii
+// NO_SYNC: call noundef i32 @_Z6__shfliii(
+// NO_SYNC: call noundef i32 @_Z6__shfliii(
+
+// NO_SYNC: define{{.*}} @_Z6__shfliii
+// NO_SYNC: call i32 @llvm.nvvm.shfl.idx.i32
+
+// unsigned long long -> long long
+// SYNC: _Z11__shfl_syncjyii
+// SYNC: call noundef i64 @_Z11__shfl_syncjxii(
+
+// long long -> int + int
+// SYNC: define{{.*}} @_Z11__shfl_syncjxii
+// SYNC: call noundef i32 @_Z11__shfl_syncjiii(
+// SYNC: call noundef i32 @_Z11__shfl_syncjiii(
+
+// SYNC: define{{.*}} @_Z11__shfl_syncjiii
+// SYNC: call i32 @llvm.nvvm.shfl.sync.idx.i32
+
+__device__ void test_long_long() {
+ unsigned long long ull = 13;
+ long long ll = 17;
+#ifndef SYNC
+ ull = __shfl(ull, 7, 32);
+ ll = __shfl(ll, 7, 32);
+ use(ull, ll);
+#else
+ ull = __shfl_sync(0x11, ull, 7, 32);
+ ll = __shfl_sync(0x11, ll, 7, 32);
+ use(ull, ll);
+#endif
+}
+
Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===================================================================
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++ clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -71,8 +71,8 @@
} \
inline __device__ unsigned long long __FnName( \
unsigned long long __val, __Type __offset, int __width = warpSize) { \
- return static_cast<unsigned long long>(::__FnName( \
- static_cast<unsigned long long>(__val), __offset, __width)); \
+ return static_cast<unsigned long long>( \
+ ::__FnName(static_cast<long long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(double __val, __Type __offset, \
int __width = warpSize) { \
@@ -139,8 +139,8 @@
inline __device__ unsigned long long __FnName( \
unsigned int __mask, unsigned long long __val, __Type __offset, \
int __width = warpSize) { \
- return static_cast<unsigned long long>(::__FnName( \
- __mask, static_cast<unsigned long long>(__val), __offset, __width)); \
+ return static_cast<unsigned long long>( \
+ ::__FnName(__mask, static_cast<long long>(__val), __offset, __width)); \
} \
inline __device__ long __FnName(unsigned int __mask, long __val, \
__Type __offset, int __width = warpSize) { \
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D129536.446285.patch
Type: text/x-patch
Size: 4084 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220720/32b0bfc2/attachment.bin>
More information about the cfe-commits
mailing list