[clang] [Headers] Create stub spirv64intrin.h (PR #131164)
Jon Chesterfield via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 13 09:00:02 PDT 2025
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/131164
Structure follows amdgcnintrin.h but with declarations where compiler intrinsics are not yet available.
Address space numbers, kernel attribute, checking how it interacts with openmp are left for later patches.
>From 0b8f555fd2561c7aac92935784caaaa365328ea9 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Thu, 13 Mar 2025 15:44:52 +0000
Subject: [PATCH] [Headers] Create stub spirv64intrin.h
---
clang/lib/Headers/amdgpuintrin.h | 2 +-
clang/lib/Headers/gpuintrin.h | 2 +
clang/lib/Headers/spirv64intrin.h | 131 ++++++++++++++++++
clang/test/Headers/gpuintrin.c | 223 ++++++++++++++++++++++++++++++
4 files changed, 357 insertions(+), 1 deletion(-)
create mode 100644 clang/lib/Headers/spirv64intrin.h
diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index f7fb8e2814180..817cfeec896c4 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -1,4 +1,4 @@
-//===-- amdgpuintrin.h - AMDPGU intrinsic functions -----------------------===//
+//===-- amdgpuintrin.h - AMDGPU intrinsic functions -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 0fb3916acac61..cf1cfd41a6788 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -60,6 +60,8 @@ _Pragma("omp end declare target");
#include <nvptxintrin.h>
#elif defined(__AMDGPU__)
#include <amdgpuintrin.h>
+#elif defined(__SPIRV64__)
+#include <spirv64intrin.h>
#elif !defined(_OPENMP)
#error "This header is only meant to be used on GPU architectures."
#endif
diff --git a/clang/lib/Headers/spirv64intrin.h b/clang/lib/Headers/spirv64intrin.h
new file mode 100644
index 0000000000000..2b9157544f170
--- /dev/null
+++ b/clang/lib/Headers/spirv64intrin.h
@@ -0,0 +1,131 @@
+//===-- spirv64intrin.h - SPIRV64 intrinsic functions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __SPIRV64INTRIN_H
+#define __SPIRV64INTRIN_H
+
+#ifndef __SPIRV64__
+#error "This file is intended for SPIRV64 targets or offloading to SPIRV64"
+#endif
+
+#ifndef __GPUINTRIN_H
+#error "Never use <spirv64intrin.h> directly; include <gpuintrin.h> instead"
+#endif
+
+// This is the skeleton of the spirv64 implementation for gpuintrin
+// Address spaces and kernel attribute are not yet implemented
+// The target-specific functions are declarations waiting for clang support
+
+#if defined(_OPENMP)
+#error "Openmp is not yet available on spirv64 though gpuintrin header"
+#endif
+
+// Type aliases to the address spaces used by the SPIRV backend.
+#define __gpu_private
+#define __gpu_constant
+#define __gpu_local
+#define __gpu_global
+#define __gpu_generic
+
+// Attribute to declare a function as a kernel.
+#define __gpu_kernel
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_x(void);
+
+// Returns the number of workgroups in the 'y' dimension of the grid.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_y(void);
+
+// Returns the number of workgroups in the 'z' dimension of the grid.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_z(void);
+
+// Returns the 'x' dimension of the current AMD workgroup's id.
+_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_x(void);
+
+// Returns the 'y' dimension of the current AMD workgroup's id.
+_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_y(void);
+
+// Returns the 'z' dimension of the current AMD workgroup's id.
+_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_z(void);
+
+// Returns the number of workitems in the 'x' dimension.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_x(void);
+
+// Returns the number of workitems in the 'y' dimension.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_y(void);
+
+// Returns the number of workitems in the 'z' dimension.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_z(void);
+
+// Returns the 'x' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_x(void);
+
+// Returns the 'y' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_y(void);
+
+// Returns the 'z' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_z(void);
+
+// Returns the size of the wave.
+_DEFAULT_FN_ATTRS uint32_t __gpu_num_lanes(void);
+
+// Returns the id of the thread inside of a wave executing together.
+_DEFAULT_FN_ATTRS uint32_t __gpu_lane_id(void);
+
+// Returns the bit-mask of active threads in the current wave.
+_DEFAULT_FN_ATTRS uint64_t __gpu_lane_mask(void);
+
+// Copies the value from the first active thread in the wave to the rest.
+_DEFAULT_FN_ATTRS uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask,
+ uint32_t __x);
+
+// Returns a bitmask of threads in the current lane for which \p x is true.
+_DEFAULT_FN_ATTRS uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x);
+
+// Waits for all the threads in the block to converge and issues a fence.
+_DEFAULT_FN_ATTRS void __gpu_sync_threads(void);
+
+// Wait for all threads in the wave to converge
+_DEFAULT_FN_ATTRS void __gpu_sync_lane(uint64_t __lane_mask);
+
+// Shuffles the the lanes inside the wave according to the given index.
+_DEFAULT_FN_ATTRS uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask,
+ uint32_t __idx, uint32_t __x,
+ uint32_t __width);
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_any_u32_impl(__lane_mask, __x);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_any_u64_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_all_u32_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_all_u64_impl(__lane_mask, __x);
+}
+
+// Terminates execution of the associated wave.
+_DEFAULT_FN_ATTRS [[noreturn]] void __gpu_exit(void);
+
+// Suspend the thread briefly to assist the scheduler during busy loops.
+_DEFAULT_FN_ATTRS void __gpu_thread_suspend(void);
+
+#endif // __SPIRV64INTRIN_H
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index 9a15ce277ba87..eaf001be19ac9 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -9,6 +9,11 @@
// RUN: -target-feature +ptx62 \
// RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefix=NVPTX
+//
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
+// RUN: -internal-isystem %S/../../lib/Headers/ \
+// RUN: -triple spirv64-- -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefix=SPIRV64
#include <gpuintrin.h>
@@ -978,6 +983,224 @@ __gpu_kernel void foo() {
// NVPTX-NEXT: call void @llvm.nvvm.exit()
// NVPTX-NEXT: ret void
//
+//
+// SPIRV64-LABEL: define spir_func void @foo(
+// SPIRV64-SAME: ) #[[ATTR0:[0-9]+]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x()
+// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i32 @__gpu_num_blocks_y()
+// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_z()
+// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_num_blocks(i32 noundef 0)
+// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_x()
+// SPIRV64-NEXT: [[CALL5:%.*]] = call spir_func i32 @__gpu_block_id_y()
+// SPIRV64-NEXT: [[CALL6:%.*]] = call spir_func i32 @__gpu_block_id_z()
+// SPIRV64-NEXT: [[CALL7:%.*]] = call spir_func i32 @__gpu_block_id(i32 noundef 0)
+// SPIRV64-NEXT: [[CALL8:%.*]] = call spir_func i32 @__gpu_num_threads_x()
+// SPIRV64-NEXT: [[CALL9:%.*]] = call spir_func i32 @__gpu_num_threads_y()
+// SPIRV64-NEXT: [[CALL10:%.*]] = call spir_func i32 @__gpu_num_threads_z()
+// SPIRV64-NEXT: [[CALL11:%.*]] = call spir_func i32 @__gpu_num_threads(i32 noundef 0)
+// SPIRV64-NEXT: [[CALL12:%.*]] = call spir_func i32 @__gpu_thread_id_x()
+// SPIRV64-NEXT: [[CALL13:%.*]] = call spir_func i32 @__gpu_thread_id_y()
+// SPIRV64-NEXT: [[CALL14:%.*]] = call spir_func i32 @__gpu_thread_id_z()
+// SPIRV64-NEXT: [[CALL15:%.*]] = call spir_func i32 @__gpu_thread_id(i32 noundef 0)
+// SPIRV64-NEXT: [[CALL16:%.*]] = call spir_func i32 @__gpu_num_lanes()
+// SPIRV64-NEXT: [[CALL17:%.*]] = call spir_func i32 @__gpu_lane_id()
+// SPIRV64-NEXT: [[CALL18:%.*]] = call spir_func i64 @__gpu_lane_mask()
+// SPIRV64-NEXT: [[CALL19:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1)
+// SPIRV64-NEXT: [[CALL20:%.*]] = call spir_func i64 @__gpu_read_first_lane_u64(i64 noundef -1, i64 noundef -1)
+// SPIRV64-NEXT: [[CALL21:%.*]] = call spir_func i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true)
+// SPIRV64-NEXT: call spir_func void @__gpu_sync_threads()
+// SPIRV64-NEXT: call spir_func void @__gpu_sync_lane(i64 noundef -1)
+// SPIRV64-NEXT: [[CALL22:%.*]] = call spir_func i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0)
+// SPIRV64-NEXT: [[CALL23:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef -1)
+// SPIRV64-NEXT: [[CALL24:%.*]] = call spir_func zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1)
+// SPIRV64-NEXT: call spir_func void @__gpu_exit() #[[ATTR4:[0-9]+]]
+// SPIRV64-NEXT: unreachable
+//
+//
+// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_blocks(
+// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
+// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
+// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
+// SPIRV64-NEXT: ]
+// SPIRV64: [[SW_BB]]:
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x()
+// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN:.*]]
+// SPIRV64: [[SW_BB1]]:
+// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_y()
+// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_BB3]]:
+// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_blocks_z()
+// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_DEFAULT]]:
+// SPIRV64-NEXT: unreachable
+// SPIRV64: [[RETURN]]:
+// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: ret i32 [[TMP1]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func i32 @__gpu_block_id(
+// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
+// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
+// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
+// SPIRV64-NEXT: ]
+// SPIRV64: [[SW_BB]]:
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_block_id_x()
+// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN:.*]]
+// SPIRV64: [[SW_BB1]]:
+// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_block_id_y()
+// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_BB3]]:
+// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_z()
+// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_DEFAULT]]:
+// SPIRV64-NEXT: unreachable
+// SPIRV64: [[RETURN]]:
+// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: ret i32 [[TMP1]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_threads(
+// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
+// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
+// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
+// SPIRV64-NEXT: ]
+// SPIRV64: [[SW_BB]]:
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_threads_x()
+// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN:.*]]
+// SPIRV64: [[SW_BB1]]:
+// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_threads_y()
+// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_BB3]]:
+// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_threads_z()
+// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_DEFAULT]]:
+// SPIRV64-NEXT: unreachable
+// SPIRV64: [[RETURN]]:
+// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: ret i32 [[TMP1]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func i32 @__gpu_thread_id(
+// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
+// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
+// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]]
+// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]]
+// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]]
+// SPIRV64-NEXT: ]
+// SPIRV64: [[SW_BB]]:
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_thread_id_x()
+// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN:.*]]
+// SPIRV64: [[SW_BB1]]:
+// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_thread_id_y()
+// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_BB3]]:
+// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_thread_id_z()
+// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: br label %[[RETURN]]
+// SPIRV64: [[SW_DEFAULT]]:
+// SPIRV64-NEXT: unreachable
+// SPIRV64: [[RETURN]]:
+// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
+// SPIRV64-NEXT: ret i32 [[TMP1]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func i64 @__gpu_read_first_lane_u64(
+// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
+// SPIRV64-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8
+// SPIRV64-NEXT: [[__HI:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: [[__LO:%.*]] = alloca i32, align 4
+// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: store i64 [[__X]], ptr [[__X_ADDR]], align 8
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__X_ADDR]], align 8
+// SPIRV64-NEXT: [[SHR:%.*]] = lshr i64 [[TMP0]], 32
+// SPIRV64-NEXT: [[CONV:%.*]] = trunc i64 [[SHR]] to i32
+// SPIRV64-NEXT: store i32 [[CONV]], ptr [[__HI]], align 4
+// SPIRV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR]], align 8
+// SPIRV64-NEXT: [[AND:%.*]] = and i64 [[TMP1]], 4294967295
+// SPIRV64-NEXT: [[CONV1:%.*]] = trunc i64 [[AND]] to i32
+// SPIRV64-NEXT: store i32 [[CONV1]], ptr [[__LO]], align 4
+// SPIRV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[TMP3:%.*]] = load i32, ptr [[__HI]], align 4
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP2]], i32 noundef [[TMP3]])
+// SPIRV64-NEXT: [[CONV2:%.*]] = zext i32 [[CALL]] to i64
+// SPIRV64-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 32
+// SPIRV64-NEXT: [[TMP4:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[TMP5:%.*]] = load i32, ptr [[__LO]], align 4
+// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP4]], i32 noundef [[TMP5]])
+// SPIRV64-NEXT: [[CONV4:%.*]] = zext i32 [[CALL3]] to i64
+// SPIRV64-NEXT: [[AND5:%.*]] = and i64 [[CONV4]], 4294967295
+// SPIRV64-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[AND5]]
+// SPIRV64-NEXT: ret i64 [[OR]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func i64 @__gpu_first_lane_id(
+// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
+// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true)
+// SPIRV64-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1
+// SPIRV64-NEXT: [[ISZERO:%.*]] = icmp eq i64 [[TMP0]], 0
+// SPIRV64-NEXT: [[FFS:%.*]] = select i1 [[ISZERO]], i64 0, i64 [[TMP2]]
+// SPIRV64-NEXT: [[CAST:%.*]] = trunc i64 [[FFS]] to i32
+// SPIRV64-NEXT: [[SUB:%.*]] = sub nsw i32 [[CAST]], 1
+// SPIRV64-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64
+// SPIRV64-NEXT: ret i64 [[CONV]]
+//
+//
+// SPIRV64-LABEL: define internal spir_func zeroext i1 @__gpu_is_first_in_lane(
+// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] {
+// SPIRV64-NEXT: [[ENTRY:.*:]]
+// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8
+// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id()
+// SPIRV64-NEXT: [[CONV:%.*]] = zext i32 [[CALL]] to i64
+// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
+// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef [[TMP0]])
+// SPIRV64-NEXT: [[CMP:%.*]] = icmp eq i64 [[CONV]], [[CALL1]]
+// SPIRV64-NEXT: ret i1 [[CMP]]
+//
//.
// AMDGPU: [[RNG3]] = !{i32 1, i32 0}
// AMDGPU: [[META4]] = !{}
More information about the cfe-commits
mailing list