[clang] [clang][HIP] Make some math not not work with AMDGCN SPIR-V (PR #128360)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Feb 22 08:24:33 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Alex Voicu (AlexVlx)
<details>
<summary>Changes</summary>
Do not hardcode `address_space(5)` (`private`) in the ROCDL interface, as that breaks SPIRV generation (the latter uses 0). Add test. In the long run we should stop using ROCDL inline.
---
Patch is 183.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128360.diff
3 Files Affected:
- (modified) clang/lib/Headers/__clang_hip_libdevice_declares.h (+16-20)
- (modified) clang/lib/Headers/__clang_hip_math.h (+16-16)
- (modified) clang/test/Headers/__clang_hip_math.hip (+1655)
``````````diff
diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h
index f15198b3d9f93..57600993fccff 100644
--- a/clang/lib/Headers/__clang_hip_libdevice_declares.h
+++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h
@@ -14,6 +14,12 @@
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
+#ifdef __SPIRV__
+#define __PRIVATE_AS __attribute__((address_space(0)))
+#else
+#define __PRIVATE_AS __attribute__((address_space(5)))
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -55,8 +61,7 @@ __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
__device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
__device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float,
float);
-__device__ float __ocml_frexp_f32(float,
- __attribute__((address_space(5))) int *);
+__device__ float __ocml_frexp_f32(float, __PRIVATE_AS int *);
__device__ __attribute__((const)) float __ocml_hypot_f32(float, float);
__device__ __attribute__((const)) int __ocml_ilogb_f32(float);
__device__ __attribute__((const)) int __ocml_isfinite_f32(float);
@@ -74,8 +79,7 @@ __device__ __attribute__((pure)) float __ocml_native_log2_f32(float);
__device__ __attribute__((const)) float __ocml_logb_f32(float);
__device__ __attribute__((pure)) float __ocml_log_f32(float);
__device__ __attribute__((pure)) float __ocml_native_log_f32(float);
-__device__ float __ocml_modf_f32(float,
- __attribute__((address_space(5))) float *);
+__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *);
__device__ __attribute__((const)) float __ocml_nearbyint_f32(float);
__device__ __attribute__((const)) float __ocml_nextafter_f32(float, float);
__device__ __attribute__((const)) float __ocml_len3_f32(float, float, float);
@@ -87,8 +91,7 @@ __device__ __attribute__((pure)) float __ocml_pow_f32(float, float);
__device__ __attribute__((pure)) float __ocml_pown_f32(float, int);
__device__ __attribute__((pure)) float __ocml_rcbrt_f32(float);
__device__ __attribute__((const)) float __ocml_remainder_f32(float, float);
-__device__ float __ocml_remquo_f32(float, float,
- __attribute__((address_space(5))) int *);
+__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *);
__device__ __attribute__((const)) float __ocml_rhypot_f32(float, float);
__device__ __attribute__((const)) float __ocml_rint_f32(float);
__device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float);
@@ -99,10 +102,8 @@ __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float);
__device__ __attribute__((const)) float __ocml_scalb_f32(float, float);
__device__ __attribute__((const)) float __ocml_scalbn_f32(float, int);
__device__ __attribute__((const)) int __ocml_signbit_f32(float);
-__device__ float __ocml_sincos_f32(float,
- __attribute__((address_space(5))) float *);
-__device__ float __ocml_sincospi_f32(float,
- __attribute__((address_space(5))) float *);
+__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *);
+__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *);
__device__ float __ocml_sin_f32(float);
__device__ float __ocml_native_sin_f32(float);
__device__ __attribute__((pure)) float __ocml_sinh_f32(float);
@@ -176,8 +177,7 @@ __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double);
__device__ __attribute__((const)) double __ocml_fmax_f64(double, double);
__device__ __attribute__((const)) double __ocml_fmin_f64(double, double);
__device__ __attribute__((const)) double __ocml_fmod_f64(double, double);
-__device__ double __ocml_frexp_f64(double,
- __attribute__((address_space(5))) int *);
+__device__ double __ocml_frexp_f64(double, __PRIVATE_AS int *);
__device__ __attribute__((const)) double __ocml_hypot_f64(double, double);
__device__ __attribute__((const)) int __ocml_ilogb_f64(double);
__device__ __attribute__((const)) int __ocml_isfinite_f64(double);
@@ -192,8 +192,7 @@ __device__ __attribute__((pure)) double __ocml_log1p_f64(double);
__device__ __attribute__((pure)) double __ocml_log2_f64(double);
__device__ __attribute__((const)) double __ocml_logb_f64(double);
__device__ __attribute__((pure)) double __ocml_log_f64(double);
-__device__ double __ocml_modf_f64(double,
- __attribute__((address_space(5))) double *);
+__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *);
__device__ __attribute__((const)) double __ocml_nearbyint_f64(double);
__device__ __attribute__((const)) double __ocml_nextafter_f64(double, double);
__device__ __attribute__((const)) double __ocml_len3_f64(double, double,
@@ -206,8 +205,7 @@ __device__ __attribute__((pure)) double __ocml_pow_f64(double, double);
__device__ __attribute__((pure)) double __ocml_pown_f64(double, int);
__device__ __attribute__((pure)) double __ocml_rcbrt_f64(double);
__device__ __attribute__((const)) double __ocml_remainder_f64(double, double);
-__device__ double __ocml_remquo_f64(double, double,
- __attribute__((address_space(5))) int *);
+__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *);
__device__ __attribute__((const)) double __ocml_rhypot_f64(double, double);
__device__ __attribute__((const)) double __ocml_rint_f64(double);
__device__ __attribute__((const)) double __ocml_rlen3_f64(double, double,
@@ -219,10 +217,8 @@ __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double);
__device__ __attribute__((const)) double __ocml_scalb_f64(double, double);
__device__ __attribute__((const)) double __ocml_scalbn_f64(double, int);
__device__ __attribute__((const)) int __ocml_signbit_f64(double);
-__device__ double __ocml_sincos_f64(double,
- __attribute__((address_space(5))) double *);
-__device__ double
-__ocml_sincospi_f64(double, __attribute__((address_space(5))) double *);
+__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *);
+__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *);
__device__ double __ocml_sin_f64(double);
__device__ __attribute__((pure)) double __ocml_sinh_f64(double);
__device__ double __ocml_sinpi_f64(double);
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 8468751d9de26..6c7dced1b35bd 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -33,6 +33,13 @@
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
#endif
+#pragma push_macro("__PRIVATE_AS")
+
+#ifdef __SPIRV__
+#define __PRIVATE_AS __attribute__((address_space(0)))
+#else
+#define __PRIVATE_AS __attribute__((address_space(5)))
+#endif
// Device library provides fast low precision and slow full-recision
// implementations for some functions. Which one gets selected depends on
// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
@@ -512,8 +519,7 @@ float modff(float __x, float *__iptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- float __r =
- __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
+ float __r = __ocml_modf_f32(__x, (__PRIVATE_AS float *)&__tmp);
*__iptr = __tmp;
return __r;
}
@@ -595,8 +601,7 @@ float remquof(float __x, float __y, int *__quo) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- float __r = __ocml_remquo_f32(
- __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
+ float __r = __ocml_remquo_f32(__x, __y, (__PRIVATE_AS int *)&__tmp);
*__quo = __tmp;
return __r;
@@ -657,8 +662,7 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
__sincosf(__x, __sinptr, __cosptr);
#else
- *__sinptr =
- __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
+ *__sinptr = __ocml_sincos_f32(__x, (__PRIVATE_AS float *)&__tmp);
*__cosptr = __tmp;
#endif
}
@@ -669,8 +673,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- *__sinptr = __ocml_sincospi_f32(
- __x, (__attribute__((address_space(5))) float *)&__tmp);
+ *__sinptr = __ocml_sincospi_f32(__x, (__PRIVATE_AS float *)&__tmp);
*__cosptr = __tmp;
}
@@ -913,8 +916,7 @@ double modf(double __x, double *__iptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- double __r =
- __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
+ double __r = __ocml_modf_f64(__x, (__PRIVATE_AS double *)&__tmp);
*__iptr = __tmp;
return __r;
@@ -1004,8 +1006,7 @@ double remquo(double __x, double __y, int *__quo) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- double __r = __ocml_remquo_f64(
- __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
+ double __r = __ocml_remquo_f64(__x, __y, (__PRIVATE_AS int *)&__tmp);
*__quo = __tmp;
return __r;
@@ -1065,8 +1066,7 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- *__sinptr = __ocml_sincos_f64(
- __x, (__attribute__((address_space(5))) double *)&__tmp);
+ *__sinptr = __ocml_sincos_f64(__x, (__PRIVATE_AS double *)&__tmp);
*__cosptr = __tmp;
}
@@ -1076,8 +1076,7 @@ void sincospi(double __x, double *__sinptr, double *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
- *__sinptr = __ocml_sincospi_f64(
- __x, (__attribute__((address_space(5))) double *)&__tmp);
+ *__sinptr = __ocml_sincospi_f64(__x, (__PRIVATE_AS double *)&__tmp);
*__cosptr = __tmp;
}
@@ -1322,6 +1321,7 @@ __host__ inline static int max(int __arg1, int __arg2) {
#endif
#pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__PRIVATE_AS")
#pragma pop_macro("__RETURN_TYPE")
#pragma pop_macro("__FAST_OR_SLOW")
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index e4254d1e64bec..a375ea47b530d 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -26,6 +26,14 @@
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -fgpu-approx-transcendentals -o - \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,APPROX %s
+// Check that we use the AMDGCNSPIRV address space map
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -triple spirv64-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -emit-llvm %s -fcuda-is-device -O1 -o - \
+// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=AMDGCNSPIRV %s
+
#define BOOL_TYPE int
typedef unsigned long long uint64_t;
@@ -57,6 +65,30 @@ typedef unsigned long long uint64_t;
// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
// CHECK-NEXT: ret i64 [[RETVAL_2_I]]
//
+// AMDGCNSPIRV-LABEL: @test___make_mantissa_base8(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]]
+// AMDGCNSPIRV: while.cond.i:
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ]
+// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I]]
+// AMDGCNSPIRV: while.body.i:
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -8
+// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48
+// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3
+// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
+// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]]
+// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]]
+// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]]
+// AMDGCNSPIRV: _ZL21__make_mantissa_base8PKc.exit:
+// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]]
+//
extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
return __make_mantissa_base8(p);
}
@@ -89,6 +121,30 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
// CHECK-NEXT: ret i64 [[RETVAL_2_I]]
//
+// AMDGCNSPIRV-LABEL: @test___make_mantissa_base10(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]]
+// AMDGCNSPIRV: while.cond.i:
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ]
+// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ]
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I]]
+// AMDGCNSPIRV: while.body.i:
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48
+// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
+// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10
+// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
+// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]]
+// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]]
+// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+// AMDGCNSPIRV: _ZL22__make_mantissa_base10PKc.exit:
+// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]]
+//
extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
return __make_mantissa_base10(p);
}
@@ -131,6 +187,44 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
// CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
// CHECK-NEXT: ret i64 [[RETVAL_2_I]]
//
+// AMDGCNSPIRV-LABEL: @test___make_mantissa_base16(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]]
+// AMDGCNSPIRV: while.cond.i:
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
+// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ]
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// AMDGCNSPIRV: while.body.i:
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48
+// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
+// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]]
+// AMDGCNSPIRV: if.else.i:
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = add i8 [[TMP0]], -97
+// AMDGCNSPIRV-NEXT: [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6
+// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
+// AMDGCNSPIRV: if.else17.i:
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP0]], -65
+// AMDGCNSPIRV-NEXT: [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6
+// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]]
+// AMDGCNSPIRV: if.end31.i:
+// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
+// AMDGCNSPIRV-NEXT: [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4
+// AMDGCNSPIRV-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
+// AMDGCNSPIRV-NEXT: [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
+// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 1
+// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I]]
+// AMDGCNSPIRV: cleanup.i:
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
+// AMDGCNSPIRV-NEXT: [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ]
+// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ]
+// AMDGCNSPIRV-NEXT: br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP12:![0-9]+]]
+// AMDGCNSPIRV: _ZL22__make_mantissa_base16PKc.exit:
+// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]]
+//
extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
return __make_mantissa_base16(p);
}
@@ -226,6 +320,89 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
// CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I36_I]] ], [ [[__R_0_I32_I]], [[WHILE_COND_I30_I]] ], [ 0, [[CLEANUP_I20_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
// CHECK-NEXT: ret i64 [[RETVAL_0_I]]
//
+// AMDGCNSPIRV-LABEL: @test___make_mantissa(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
+// AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]]
+// AMDGCNSPIRV: if.then.i:
+// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
+// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_PREHEADER:%.*]]
+// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT: ]
+// AMDGCNSPIRV: while.cond.i28.i.preheader:
+// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I:%.*]]
+// AMDGCNSPIRV: while.cond.i28.i:
+// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_A...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/128360
More information about the cfe-commits
mailing list