[clang] [llvm] [HLSL] Add matrix support to atan2 (PR #194984)
via cfe-commits
cfe-commits at lists.llvm.org
Fri May 8 11:16:32 PDT 2026
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/194984
>From babda1fba31e2b04f4b7934e98796ddb9fb4bb02 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Tue, 28 Apr 2026 15:07:52 -0700
Subject: [PATCH 01/20] make it support float
---
clang/lib/Sema/SemaHLSL.cpp | 10 ++++----
clang/test/CodeGenHLSL/builtins/atan2.hlsl | 27 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index aba1c5072a5fc..bb996d291675e 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3232,10 +3232,12 @@ static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
int ArgOrdinal,
clang::QualType PassedType) {
- clang::QualType BaseType =
- PassedType->isVectorType()
- ? PassedType->castAs<clang::VectorType>()->getElementType()
- : PassedType;
+ clang::QualType BaseType = PassedType;
+ if (PassedType->isVectorType())
+ BaseType = PassedType->castAs<clang::VectorType>()->getElementType();
+ else if (PassedType->isMatrixType())
+ BaseType = PassedType->castAs<clang::MatrixType>()->getElementType();
+
if (!BaseType->isHalfType() && !BaseType->isFloat32Type())
return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
<< ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
diff --git a/clang/test/CodeGenHLSL/builtins/atan2.hlsl b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
index 512b44a5780db..986ddc75b4f8e 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
@@ -34,6 +34,20 @@ half4 test_atan2_half4 (half4 p0, half4 p1) {
return atan2(p0, p1);
}
+// CHECK-LABEL: test_atan2_half4x4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <16 x half> @llvm.atan2.v16f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
+half4x4 test_atan2_half4x4 (half4x4 p0, half4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2x3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <6 x half> @llvm.atan2.v6f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+half2x3 test_atan2_half2x3 (half2x3 p0, half2x3 p1) {
+ return atan2(p0, p1);
+}
+
// CHECK-LABEL: test_atan2_float
// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32
float test_atan2_float (float p0, float p1) {
@@ -57,3 +71,16 @@ float3 test_atan2_float3 (float3 p0, float3 p1) {
float4 test_atan2_float4 (float4 p0, float4 p1) {
return atan2(p0, p1);
}
+
+// CHECK-LABEL: test_atan2_float4x4
+// CHECK: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
+float4x4 test_atan2_float4x4 (float4x4 p0, float4x4 p1) {
+ return atan2(p0, p1);
+}
+
+
+// CHECK-LABEL: test_atan2_float2x3
+// CHECK: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+float2x3 test_atan2_float2x3 (float2x3 p0, float2x3 p1) {
+ return atan2(p0, p1);
+}
>From 43586d11f49f8a945fe7f4aa690d1e7eaff450f0 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Tue, 28 Apr 2026 18:01:58 -0700
Subject: [PATCH 02/20] adding matrix impl
---
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 6 ++++++
clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl | 10 ++++++++++
2 files changed, 16 insertions(+)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index cced7b0eabb1f..eb2ebe485b7de 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -309,5 +309,11 @@ constexpr matrix<T, R, C> mul(matrix<T, R, C> x, T y) {
return x * y;
}
+template <typename T, int R, int C>
+constexpr matrix<float, R, C> atan2(matrix<T, R, C> y, matrix<T, R, C> x) {
+ return __builtin_elementwise_atan2((matrix<float, R, C>)y,
+ (matrix<float, R, C>)x);
+}
+
} // namespace hlsl
#endif //_HLSL_HLSL_INTRINSICS_H_
diff --git a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
index 85ff75110a78e..0779ab2d13d7e 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
@@ -181,3 +181,13 @@ float3 test_atan2_uint64_t3 (uint64_t3 p0, uint64_t3 p1) {
float4 test_atan2_uint64_t4 (uint64_t4 p0, uint64_t4 p1) {
return atan2(p0, p1);
}
+
+
+// CHECK: define [[FNATTRS]] <16 x float> @_Z21test_atan2_int64_t4x4u11matrix_typeILj4ELj4ElES_(
+// CHECK: [[CONVI:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_int64_t4x4 (int64_t4x4 p0, int64_t4x4 p1) {
+ return atan2(p0, p1);
+}
>From c4a5db93bdc574b7a1f9f1113413537884106bb8 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Wed, 29 Apr 2026 16:35:41 -0700
Subject: [PATCH 03/20] add tests and overloads
---
.../lib/Headers/hlsl/hlsl_compat_overloads.h | 7 +++++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 6 ------
clang/lib/Sema/SemaHLSL.cpp | 4 ++--
.../CodeGenHLSL/builtins/atan2-overloads.hlsl | 19 ++++++++++++++++++-
4 files changed, 27 insertions(+), 9 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index ee243abef6a41..e916228f1cc11 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -230,6 +230,12 @@ namespace hlsl {
return fn((float4)V1, (float4)V2, (float4)V3); \
}
+#define _DXC_COMPAT_BINARY_MATRIX_OVERLOADS(fn, ty) \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ template <typename T, int R, int C> \
+ constexpr matrix<ty, R, C> fn(matrix<T, R, C> y, matrix<T, R, C> x) { \
+ return fn((matrix<ty, R, C>)y, (matrix<ty, R, C>)x); \
+ }
//===----------------------------------------------------------------------===//
// acos builtins overloads
//===----------------------------------------------------------------------===//
@@ -257,6 +263,7 @@ _DXC_COMPAT_UNARY_INTEGER_OVERLOADS(atan)
_DXC_COMPAT_BINARY_DOUBLE_OVERLOADS(atan2)
_DXC_COMPAT_BINARY_INTEGER_OVERLOADS(atan2)
+_DXC_COMPAT_BINARY_MATRIX_OVERLOADS(atan2, float)
//===----------------------------------------------------------------------===//
// ceil builtins overloads
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index eb2ebe485b7de..cced7b0eabb1f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -309,11 +309,5 @@ constexpr matrix<T, R, C> mul(matrix<T, R, C> x, T y) {
return x * y;
}
-template <typename T, int R, int C>
-constexpr matrix<float, R, C> atan2(matrix<T, R, C> y, matrix<T, R, C> x) {
- return __builtin_elementwise_atan2((matrix<float, R, C>)y,
- (matrix<float, R, C>)x);
-}
-
} // namespace hlsl
#endif //_HLSL_HLSL_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index bb996d291675e..368a54b44c2d5 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3240,8 +3240,8 @@ static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
if (!BaseType->isHalfType() && !BaseType->isFloat32Type())
return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
- << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
- << /* half or float */ 2 << PassedType;
+ << ArgOrdinal << /* scalar, vector or matrix of */ 5
+ << /* no int */ 0 << /* half or float */ 2 << PassedType;
return false;
}
diff --git a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
index 0779ab2d13d7e..d36d2aebcf4c8 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
@@ -38,6 +38,15 @@ float4 test_atan2_double4 (double4 p0, double4 p1) {
return atan2(p0, p1);
}
+// CHECK: define [[FNATTRS]] <16 x float> @_Z20test_atan2_double4x4u11matrix_typeILj4ELj4EdES_(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_double4x4 (double4x4 p0, double4x4 p1) {
+ return atan2(p0, p1);
+}
+
// CHECK: define [[FNATTRS]] float @_Z14test_atan2_intii(
// CHECK: [[CONVI:%.*]] = sitofp i32 %{{.*}} to float
// CHECK: [[CONV1I:%.*]] = sitofp i32 %{{.*}} to float
@@ -182,7 +191,6 @@ float4 test_atan2_uint64_t4 (uint64_t4 p0, uint64_t4 p1) {
return atan2(p0, p1);
}
-
// CHECK: define [[FNATTRS]] <16 x float> @_Z21test_atan2_int64_t4x4u11matrix_typeILj4ELj4ElES_(
// CHECK: [[CONVI:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
// CHECK: [[CONV1I:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
@@ -191,3 +199,12 @@ float4 test_atan2_uint64_t4 (uint64_t4 p0, uint64_t4 p1) {
float4x4 test_atan2_int64_t4x4 (int64_t4x4 p0, int64_t4x4 p1) {
return atan2(p0, p1);
}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_Z22test_atan2_uint64_t4x4u11matrix_typeILj4ELj4EmES_(
+// CHECK: [[CONVI:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_uint64_t4x4 (uint64_t4x4 p0, uint64_t4x4 p1) {
+ return atan2(p0, p1);
+}
>From 7ccd15fcae5d9a517a9a154944503de1ed42f5e9 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Wed, 29 Apr 2026 16:55:02 -0700
Subject: [PATCH 04/20] make macro match other's
---
.../lib/Headers/hlsl/hlsl_compat_overloads.h | 39 ++++++++++++++++---
1 file changed, 34 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index e916228f1cc11..0c4963b495b51 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -230,11 +230,39 @@ namespace hlsl {
return fn((float4)V1, (float4)V2, (float4)V3); \
}
-#define _DXC_COMPAT_BINARY_MATRIX_OVERLOADS(fn, ty) \
+#define _DXC_COMPAT_BINARY_DOUBLE_MATRIX_OVERLOADS(fn) \
+ template <uint R, uint C> \
+ constexpr matrix<float, R, C> fn(matrix<double, R, C> y, \
+ matrix<double, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ }
+
+#define _DXC_COMPAT_BINARY_INTEGER_MATRIX_OVERLOADS(fn) \
+ template <uint R, uint C> \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr matrix<float, R, C> fn(matrix<int, R, C> y, matrix<int, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ } \
+ \
+ template <uint R, uint C> \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr matrix<float, R, C> fn(matrix<uint, R, C> y, \
+ matrix<uint, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ } \
+ \
+ template <uint R, uint C> \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr matrix<float, R, C> fn(matrix<int64_t, R, C> y, \
+ matrix<int64_t, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ } \
+ \
+ template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- template <typename T, int R, int C> \
- constexpr matrix<ty, R, C> fn(matrix<T, R, C> y, matrix<T, R, C> x) { \
- return fn((matrix<ty, R, C>)y, (matrix<ty, R, C>)x); \
+ constexpr matrix<float, R, C> fn(matrix<uint64_t, R, C> y, \
+ matrix<uint64_t, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
}
//===----------------------------------------------------------------------===//
// acos builtins overloads
@@ -263,7 +291,8 @@ _DXC_COMPAT_UNARY_INTEGER_OVERLOADS(atan)
_DXC_COMPAT_BINARY_DOUBLE_OVERLOADS(atan2)
_DXC_COMPAT_BINARY_INTEGER_OVERLOADS(atan2)
-_DXC_COMPAT_BINARY_MATRIX_OVERLOADS(atan2, float)
+_DXC_COMPAT_BINARY_DOUBLE_MATRIX_OVERLOADS(atan2)
+_DXC_COMPAT_BINARY_INTEGER_MATRIX_OVERLOADS(atan2)
//===----------------------------------------------------------------------===//
// ceil builtins overloads
>From fb3072ab020b058f6f21646aa3dd6af2f86c1803 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Wed, 29 Apr 2026 17:02:28 -0700
Subject: [PATCH 05/20] add more tests
---
.../CodeGenHLSL/builtins/atan2-overloads.hlsl | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
index d36d2aebcf4c8..6ecad28850b00 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
@@ -191,6 +191,24 @@ float4 test_atan2_uint64_t4 (uint64_t4 p0, uint64_t4 p1) {
return atan2(p0, p1);
}
+// CHECK: define [[FNATTRS]] <16 x float> @_Z19test_atan2_int_t4x4u11matrix_typeILj4ELj4EiES_(
+// CHECK: [[CONVI:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_int_t4x4 (int4x4 p0, int4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_Z20test_atan2_uint_t4x4u11matrix_typeILj4ELj4EjES_(
+// CHECK: [[CONVI:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_uint_t4x4 (uint4x4 p0, uint4x4 p1) {
+ return atan2(p0, p1);
+}
+
// CHECK: define [[FNATTRS]] <16 x float> @_Z21test_atan2_int64_t4x4u11matrix_typeILj4ELj4ElES_(
// CHECK: [[CONVI:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
// CHECK: [[CONV1I:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
>From fe8cbb5342b9c8db8bd0f858ec533f115863057c Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 30 Apr 2026 11:06:45 -0700
Subject: [PATCH 06/20] adding sema tests
---
.../lib/Headers/hlsl/hlsl_compat_overloads.h | 1 +
.../binary-compat-overload-warnings.hlsl | 27 ++++++++++++++++++-
.../BuiltIns/half-float-only-errors2.hlsl | 6 +++++
3 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index 0c4963b495b51..cddd940d1083e 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -232,6 +232,7 @@ namespace hlsl {
#define _DXC_COMPAT_BINARY_DOUBLE_MATRIX_OVERLOADS(fn) \
template <uint R, uint C> \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
constexpr matrix<float, R, C> fn(matrix<double, R, C> y, \
matrix<double, R, C> x) { \
return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
diff --git a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
index 27bb683825de8..7b93ea089d854 100644
--- a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxilv1.0-unknown-shadermodel6.0-compute -std=hlsl202x -emit-llvm-only -disable-llvm-passes -DFUNC=atan2 %s 2>&1 | FileCheck %s -DFUNC=atan2
+// RUN: %clang_cc1 -finclude-default-header -triple dxilv1.0-unknown-shadermodel6.0-compute -std=hlsl202x -emit-llvm-only -disable-llvm-passes -DFUNC=atan2 %s 2>&1 | FileCheck %s -DFUNC=atan2 --check-prefixes=CHECK,ATAN2
// RUN: %clang_cc1 -finclude-default-header -triple dxilv1.0-unknown-shadermodel6.0-compute -std=hlsl202x -emit-llvm-only -disable-llvm-passes -DFUNC=pow %s 2>&1 | FileCheck %s -DFUNC=pow
// RUN: %clang_cc1 -finclude-default-header -triple dxilv1.0-unknown-shadermodel6.0-compute -std=hlsl202x -emit-llvm-only -disable-llvm-passes -DFUNC=step %s 2>&1 | FileCheck %s -DFUNC=step
@@ -23,6 +23,11 @@ float4 test_binary_double4(double4 p0) {
return FUNC(p0, p0);
}
+float4x4 test_binary_double4x4(double4x4 p0) {
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x 64 bit API lowering for [[FUNC]] is deprecated. Explicitly cast parameters to 32 or 16 bit types.
+ return FUNC(p0, p0);
+}
+
// binary integer overloads
// only test scalar ones for brevity
float test_binary_int(int p0) {
@@ -44,3 +49,23 @@ float test_binary_int(uint64_t p0) {
// CHECK: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
+
+float4x4 test_binary_uint4x4(uint4x4 p0) {
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ return FUNC(p0, p0);
+}
+
+float4x4 test_binary_int4x4(int4x4 p0) {
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ return FUNC(p0, p0);
+}
+
+float4x4 test_binary_int64_t4x4(int64_t4x4 p0) {
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ return FUNC(p0, p0);
+}
+
+float4x4 test_binary_uint64_t4x4(uint64_t4x4 p0) {
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ return FUNC(p0, p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
index 9e10e1afa9385..d7a2f15f6baa8 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
@@ -11,3 +11,9 @@ double2 test_vec_double_builtin(double2 p0, double2 p1) {
return TEST_FUNC(p0, p1);
// expected-error at -1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double2' (aka 'vector<double, 2>'))}}
}
+
+// Temporary matrix workarround until we have proper matrix support in the builtins.
+double2x2 test_vec_double_builtin(double2x2 p0, double2x2 p1) {
+ return __builtin_elementwise_atan2(p0, p1);
+ // expected-error at -1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double2x2' (aka 'matrix<double, 2, 2>'))}}
+}
>From 93a09ea2a85848ce27a56591f12a6749f408ad13 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 30 Apr 2026 13:51:01 -0700
Subject: [PATCH 07/20] add matrix overloads
---
clang/include/clang/Basic/HLSLIntrinsics.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/HLSLIntrinsics.td b/clang/include/clang/Basic/HLSLIntrinsics.td
index 144b27cab7398..cc1e30d684d3e 100644
--- a/clang/include/clang/Basic/HLSLIntrinsics.td
+++ b/clang/include/clang/Basic/HLSLIntrinsics.td
@@ -457,7 +457,7 @@ determine the correct quadrant.
\param x The x-coordinate.
}];
let VaryingTypes = [HalfTy, FloatTy];
- let VaryingMatDims = [];
+ let VaryingMatDims = AllMatDims;
}
// Returns the smallest integer value that is greater than or equal to the
>From e0ba50390bf11d79eda7e62cd99d4df18da50dec Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 30 Apr 2026 14:12:15 -0700
Subject: [PATCH 08/20] address comments
---
clang/include/clang/Basic/HLSLIntrinsics.td | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/include/clang/Basic/HLSLIntrinsics.td b/clang/include/clang/Basic/HLSLIntrinsics.td
index cc1e30d684d3e..6084a6f92180e 100644
--- a/clang/include/clang/Basic/HLSLIntrinsics.td
+++ b/clang/include/clang/Basic/HLSLIntrinsics.td
@@ -457,7 +457,6 @@ determine the correct quadrant.
\param x The x-coordinate.
}];
let VaryingTypes = [HalfTy, FloatTy];
- let VaryingMatDims = AllMatDims;
}
// Returns the smallest integer value that is greater than or equal to the
>From 7b10499001800a74940fd3eff0c5f7a8b7da24bc Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 30 Apr 2026 15:13:57 -0700
Subject: [PATCH 09/20] add _mat tests
---
clang/test/CodeGenHLSL/builtins/atan2.hlsl | 27 ---
.../test/CodeGenHLSL/builtins/atan2_mat.hlsl | 215 ++++++++++++++++++
.../test/SemaHLSL/BuiltIns/atan2-errors.hlsl | 7 +
.../BuiltIns/half-float-only-errors2.hlsl | 6 -
4 files changed, 222 insertions(+), 33 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl
diff --git a/clang/test/CodeGenHLSL/builtins/atan2.hlsl b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
index 986ddc75b4f8e..512b44a5780db 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
@@ -34,20 +34,6 @@ half4 test_atan2_half4 (half4 p0, half4 p1) {
return atan2(p0, p1);
}
-// CHECK-LABEL: test_atan2_half4x4
-// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <16 x half> @llvm.atan2.v16f16
-// NO_HALF: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
-half4x4 test_atan2_half4x4 (half4x4 p0, half4x4 p1) {
- return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_half2x3
-// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <6 x half> @llvm.atan2.v6f16
-// NO_HALF: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
-half2x3 test_atan2_half2x3 (half2x3 p0, half2x3 p1) {
- return atan2(p0, p1);
-}
-
// CHECK-LABEL: test_atan2_float
// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32
float test_atan2_float (float p0, float p1) {
@@ -71,16 +57,3 @@ float3 test_atan2_float3 (float3 p0, float3 p1) {
float4 test_atan2_float4 (float4 p0, float4 p1) {
return atan2(p0, p1);
}
-
-// CHECK-LABEL: test_atan2_float4x4
-// CHECK: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
-float4x4 test_atan2_float4x4 (float4x4 p0, float4x4 p1) {
- return atan2(p0, p1);
-}
-
-
-// CHECK-LABEL: test_atan2_float2x3
-// CHECK: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
-float2x3 test_atan2_float2x3 (float2x3 p0, float2x3 p1) {
- return atan2(p0, p1);
-}
diff --git a/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl b/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
new file mode 100644
index 0000000000000..db9439edfcc11
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
@@ -0,0 +1,215 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// CHECK-LABEL: test_atan2_half1x1
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <1 x half> @llvm.atan2.v1f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <1 x float> @llvm.atan2.v1f32
+half1x1 test_atan2_half1x1 (half1x1 p0, half1x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half1x2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan2.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
+half1x2 test_atan2_half1x2 (half1x2 p0, half1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half1x3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan2.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
+half1x3 test_atan2_half1x3 (half1x3 p0, half1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half1x4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan2.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+half1x4 test_atan2_half1x4 (half1x4 p0, half1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2x1
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan2.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
+half2x1 test_atan2_half2x1 (half2x1 p0, half2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2x2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan2.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+half2x2 test_atan2_half2x2 (half2x2 p0, half2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2x3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <6 x half> @llvm.atan2.v6f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+half2x3 test_atan2_half2x3 (half2x3 p0, half2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2x4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <8 x half> @llvm.atan2.v8f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <8 x float> @llvm.atan2.v8f32
+half2x4 test_atan2_half2x4 (half2x4 p0, half2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half3x1
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan2.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
+half3x1 test_atan2_half3x1 (half3x1 p0, half3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half3x2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <6 x half> @llvm.atan2.v6f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+half3x2 test_atan2_half3x2 (half3x2 p0, half3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half3x3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <9 x half> @llvm.atan2.v9f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <9 x float> @llvm.atan2.v9f32
+half3x3 test_atan2_half3x3 (half3x3 p0, half3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half3x4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <12 x half> @llvm.atan2.v12f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <12 x float> @llvm.atan2.v12f32
+half3x4 test_atan2_half3x4 (half3x4 p0, half3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half4x1
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan2.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+half4x1 test_atan2_half4x1 (half4x1 p0, half4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half4x2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <8 x half> @llvm.atan2.v8f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <8 x float> @llvm.atan2.v8f32
+half4x2 test_atan2_half4x2 (half4x2 p0, half4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half4x3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <12 x half> @llvm.atan2.v12f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <12 x float> @llvm.atan2.v12f32
+half4x3 test_atan2_half4x3 (half4x3 p0, half4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half4x4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <16 x half> @llvm.atan2.v16f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
+half4x4 test_atan2_half4x4 (half4x4 p0, half4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float1x1
+// CHECK: call reassoc nnan ninf nsz arcp afn <1 x float> @llvm.atan2.v1f32
+float1x1 test_atan2_float1x1 (float1x1 p0, float1x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float1x2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
+float1x2 test_atan2_float1x2 (float1x2 p0, float1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float1x3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
+float1x3 test_atan2_float1x3 (float1x3 p0, float1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float1x4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+float1x4 test_atan2_float1x4 (float1x4 p0, float1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float2x1
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
+float2x1 test_atan2_float2x1 (float2x1 p0, float2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float2x2
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+float2x2 test_atan2_float2x2 (float2x2 p0, float2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float2x3
+// CHECK: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+float2x3 test_atan2_float2x3 (float2x3 p0, float2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float2x4
+// CHECK: call reassoc nnan ninf nsz arcp afn <8 x float> @llvm.atan2.v8f32
+float2x4 test_atan2_float2x4 (float2x4 p0, float2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float3x1
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
+float3x1 test_atan2_float3x1 (float3x1 p0, float3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float3x2
+// CHECK: call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.atan2.v6f32
+float3x2 test_atan2_float3x2 (float3x2 p0, float3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float3x3
+// CHECK: call reassoc nnan ninf nsz arcp afn <9 x float> @llvm.atan2.v9f32
+float3x3 test_atan2_float3x3 (float3x3 p0, float3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float3x4
+// CHECK: call reassoc nnan ninf nsz arcp afn <12 x float> @llvm.atan2.v12f32
+float3x4 test_atan2_float3x4 (float3x4 p0, float3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float4x1
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
+float4x1 test_atan2_float4x1 (float4x1 p0, float4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float4x2
+// CHECK: call reassoc nnan ninf nsz arcp afn <8 x float> @llvm.atan2.v8f32
+float4x2 test_atan2_float4x2 (float4x2 p0, float4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float4x3
+// CHECK: call reassoc nnan ninf nsz arcp afn <12 x float> @llvm.atan2.v12f32
+float4x3 test_atan2_float4x3 (float4x3 p0, float4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float4x4
+// CHECK: call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.atan2.v16f32
+float4x4 test_atan2_float4x4 (float4x4 p0, float4x4 p1) {
+ return atan2(p0, p1);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl
new file mode 100644
index 0000000000000..19467d99d2292
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+
+double2x2 test_vec_double_builtin(double2x2 p0, double2x2 p1) {
+ return __builtin_elementwise_atan2(p0, p1);
+ // expected-error at -1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double2x2' (aka 'matrix<double, 2, 2>'))}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
index d7a2f15f6baa8..9e10e1afa9385 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
@@ -11,9 +11,3 @@ double2 test_vec_double_builtin(double2 p0, double2 p1) {
return TEST_FUNC(p0, p1);
// expected-error at -1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double2' (aka 'vector<double, 2>'))}}
}
-
-// Temporary matrix workarround until we have proper matrix support in the builtins.
-double2x2 test_vec_double_builtin(double2x2 p0, double2x2 p1) {
- return __builtin_elementwise_atan2(p0, p1);
- // expected-error at -1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double2x2' (aka 'matrix<double, 2, 2>'))}}
-}
>From c328726c2425824319b31bde3e14b1dbe9a8bd37 Mon Sep 17 00:00:00 2001
From: joaosaffran <joaosaffran at gmail.com>
Date: Thu, 30 Apr 2026 15:18:46 -0700
Subject: [PATCH 10/20] Apply suggestion from @Icohedron
Co-authored-by: Deric C. <cheung.deric at gmail.com>
---
clang/lib/Headers/hlsl/hlsl_compat_overloads.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index cddd940d1083e..12223aee6a4e7 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -265,6 +265,7 @@ namespace hlsl {
matrix<uint64_t, R, C> x) { \
return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
}
+
//===----------------------------------------------------------------------===//
// acos builtins overloads
//===----------------------------------------------------------------------===//
>From 9c0a5bcef00b9400fd85e9eca2cf0afb24c5daaf Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 30 Apr 2026 15:20:44 -0700
Subject: [PATCH 11/20] clean up
---
clang/lib/Sema/SemaHLSL.cpp | 4 ++--
.../BuiltIns/{atan2-errors.hlsl => atan2-errors_mat.hlsl} | 0
2 files changed, 2 insertions(+), 2 deletions(-)
rename clang/test/SemaHLSL/BuiltIns/{atan2-errors.hlsl => atan2-errors_mat.hlsl} (100%)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 368a54b44c2d5..bb996d291675e 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3240,8 +3240,8 @@ static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
if (!BaseType->isHalfType() && !BaseType->isFloat32Type())
return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
- << ArgOrdinal << /* scalar, vector or matrix of */ 5
- << /* no int */ 0 << /* half or float */ 2 << PassedType;
+ << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+ << /* half or float */ 2 << PassedType;
return false;
}
diff --git a/clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/atan2-errors_mat.hlsl
similarity index 100%
rename from clang/test/SemaHLSL/BuiltIns/atan2-errors.hlsl
rename to clang/test/SemaHLSL/BuiltIns/atan2-errors_mat.hlsl
>From a31a8fc0cc64a66631c9be680158b2944c3521b7 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Fri, 1 May 2026 11:35:19 -0700
Subject: [PATCH 12/20] remove templates
---
.../lib/Headers/hlsl/hlsl_compat_overloads.h | 402 ++++++++++-
.../builtins/atan2-overloads_mat.hlsl | 679 ++++++++++++++++++
2 files changed, 1061 insertions(+), 20 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/atan2-overloads_mat.hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index 12223aee6a4e7..08af61bed7b9a 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -231,41 +231,403 @@ namespace hlsl {
}
#define _DXC_COMPAT_BINARY_DOUBLE_MATRIX_OVERLOADS(fn) \
- template <uint R, uint C> \
_DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr matrix<float, R, C> fn(matrix<double, R, C> y, \
- matrix<double, R, C> x) { \
- return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ constexpr float1x1 fn(double1x1 y, double1x1 x) { \
+ return fn((float1x1)y, (float1x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float1x2 fn(double1x2 y, double1x2 x) { \
+ return fn((float1x2)y, (float1x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float1x3 fn(double1x3 y, double1x3 x) { \
+ return fn((float1x3)y, (float1x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float1x4 fn(double1x4 y, double1x4 x) { \
+ return fn((float1x4)y, (float1x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float2x1 fn(double2x1 y, double2x1 x) { \
+ return fn((float2x1)y, (float2x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float2x2 fn(double2x2 y, double2x2 x) { \
+ return fn((float2x2)y, (float2x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float2x3 fn(double2x3 y, double2x3 x) { \
+ return fn((float2x3)y, (float2x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float2x4 fn(double2x4 y, double2x4 x) { \
+ return fn((float2x4)y, (float2x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float3x1 fn(double3x1 y, double3x1 x) { \
+ return fn((float3x1)y, (float3x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float3x2 fn(double3x2 y, double3x2 x) { \
+ return fn((float3x2)y, (float3x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float3x3 fn(double3x3 y, double3x3 x) { \
+ return fn((float3x3)y, (float3x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float3x4 fn(double3x4 y, double3x4 x) { \
+ return fn((float3x4)y, (float3x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float4x1 fn(double4x1 y, double4x1 x) { \
+ return fn((float4x1)y, (float4x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float4x2 fn(double4x2 y, double4x2 x) { \
+ return fn((float4x2)y, (float4x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float4x3 fn(double4x3 y, double4x3 x) { \
+ return fn((float4x3)y, (float4x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_64BIT_FN(fn) \
+ constexpr float4x4 fn(double4x4 y, double4x4 x) { \
+ return fn((float4x4)y, (float4x4)x); \
}
#define _DXC_COMPAT_BINARY_INTEGER_MATRIX_OVERLOADS(fn) \
- template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr matrix<float, R, C> fn(matrix<int, R, C> y, matrix<int, R, C> x) { \
- return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ constexpr float1x1 fn(int1x1 y, int1x1 x) { \
+ return fn((float1x1)y, (float1x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x2 fn(int1x2 y, int1x2 x) { \
+ return fn((float1x2)y, (float1x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x3 fn(int1x3 y, int1x3 x) { \
+ return fn((float1x3)y, (float1x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x4 fn(int1x4 y, int1x4 x) { \
+ return fn((float1x4)y, (float1x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x1 fn(int2x1 y, int2x1 x) { \
+ return fn((float2x1)y, (float2x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x2 fn(int2x2 y, int2x2 x) { \
+ return fn((float2x2)y, (float2x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x3 fn(int2x3 y, int2x3 x) { \
+ return fn((float2x3)y, (float2x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x4 fn(int2x4 y, int2x4 x) { \
+ return fn((float2x4)y, (float2x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x1 fn(int3x1 y, int3x1 x) { \
+ return fn((float3x1)y, (float3x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x2 fn(int3x2 y, int3x2 x) { \
+ return fn((float3x2)y, (float3x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x3 fn(int3x3 y, int3x3 x) { \
+ return fn((float3x3)y, (float3x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x4 fn(int3x4 y, int3x4 x) { \
+ return fn((float3x4)y, (float3x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x1 fn(int4x1 y, int4x1 x) { \
+ return fn((float4x1)y, (float4x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x2 fn(int4x2 y, int4x2 x) { \
+ return fn((float4x2)y, (float4x2)x); \
} \
\
- template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr matrix<float, R, C> fn(matrix<uint, R, C> y, \
- matrix<uint, R, C> x) { \
- return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ constexpr float4x3 fn(int4x3 y, int4x3 x) { \
+ return fn((float4x3)y, (float4x3)x); \
} \
\
- template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr matrix<float, R, C> fn(matrix<int64_t, R, C> y, \
- matrix<int64_t, R, C> x) { \
- return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ constexpr float4x4 fn(int4x4 y, int4x4 x) { \
+ return fn((float4x4)y, (float4x4)x); \
+ } \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x1 fn(uint1x1 y, uint1x1 x) { \
+ return fn((float1x1)y, (float1x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x2 fn(uint1x2 y, uint1x2 x) { \
+ return fn((float1x2)y, (float1x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x3 fn(uint1x3 y, uint1x3 x) { \
+ return fn((float1x3)y, (float1x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x4 fn(uint1x4 y, uint1x4 x) { \
+ return fn((float1x4)y, (float1x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x1 fn(uint2x1 y, uint2x1 x) { \
+ return fn((float2x1)y, (float2x1)x); \
} \
\
- template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr matrix<float, R, C> fn(matrix<uint64_t, R, C> y, \
- matrix<uint64_t, R, C> x) { \
- return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
+ constexpr float2x2 fn(uint2x2 y, uint2x2 x) { \
+ return fn((float2x2)y, (float2x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x3 fn(uint2x3 y, uint2x3 x) { \
+ return fn((float2x3)y, (float2x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x4 fn(uint2x4 y, uint2x4 x) { \
+ return fn((float2x4)y, (float2x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x1 fn(uint3x1 y, uint3x1 x) { \
+ return fn((float3x1)y, (float3x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x2 fn(uint3x2 y, uint3x2 x) { \
+ return fn((float3x2)y, (float3x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x3 fn(uint3x3 y, uint3x3 x) { \
+ return fn((float3x3)y, (float3x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x4 fn(uint3x4 y, uint3x4 x) { \
+ return fn((float3x4)y, (float3x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x1 fn(uint4x1 y, uint4x1 x) { \
+ return fn((float4x1)y, (float4x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x2 fn(uint4x2 y, uint4x2 x) { \
+ return fn((float4x2)y, (float4x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x3 fn(uint4x3 y, uint4x3 x) { \
+ return fn((float4x3)y, (float4x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x4 fn(uint4x4 y, uint4x4 x) { \
+ return fn((float4x4)y, (float4x4)x); \
+ } \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x1 fn(int64_t1x1 y, int64_t1x1 x) { \
+ return fn((float1x1)y, (float1x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x2 fn(int64_t1x2 y, int64_t1x2 x) { \
+ return fn((float1x2)y, (float1x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x3 fn(int64_t1x3 y, int64_t1x3 x) { \
+ return fn((float1x3)y, (float1x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x4 fn(int64_t1x4 y, int64_t1x4 x) { \
+ return fn((float1x4)y, (float1x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x1 fn(int64_t2x1 y, int64_t2x1 x) { \
+ return fn((float2x1)y, (float2x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x2 fn(int64_t2x2 y, int64_t2x2 x) { \
+ return fn((float2x2)y, (float2x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x3 fn(int64_t2x3 y, int64_t2x3 x) { \
+ return fn((float2x3)y, (float2x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x4 fn(int64_t2x4 y, int64_t2x4 x) { \
+ return fn((float2x4)y, (float2x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x1 fn(int64_t3x1 y, int64_t3x1 x) { \
+ return fn((float3x1)y, (float3x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x2 fn(int64_t3x2 y, int64_t3x2 x) { \
+ return fn((float3x2)y, (float3x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x3 fn(int64_t3x3 y, int64_t3x3 x) { \
+ return fn((float3x3)y, (float3x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x4 fn(int64_t3x4 y, int64_t3x4 x) { \
+ return fn((float3x4)y, (float3x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x1 fn(int64_t4x1 y, int64_t4x1 x) { \
+ return fn((float4x1)y, (float4x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x2 fn(int64_t4x2 y, int64_t4x2 x) { \
+ return fn((float4x2)y, (float4x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x3 fn(int64_t4x3 y, int64_t4x3 x) { \
+ return fn((float4x3)y, (float4x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x4 fn(int64_t4x4 y, int64_t4x4 x) { \
+ return fn((float4x4)y, (float4x4)x); \
+ } \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x1 fn(uint64_t1x1 y, uint64_t1x1 x) { \
+ return fn((float1x1)y, (float1x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x2 fn(uint64_t1x2 y, uint64_t1x2 x) { \
+ return fn((float1x2)y, (float1x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x3 fn(uint64_t1x3 y, uint64_t1x3 x) { \
+ return fn((float1x3)y, (float1x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float1x4 fn(uint64_t1x4 y, uint64_t1x4 x) { \
+ return fn((float1x4)y, (float1x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x1 fn(uint64_t2x1 y, uint64_t2x1 x) { \
+ return fn((float2x1)y, (float2x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x2 fn(uint64_t2x2 y, uint64_t2x2 x) { \
+ return fn((float2x2)y, (float2x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x3 fn(uint64_t2x3 y, uint64_t2x3 x) { \
+ return fn((float2x3)y, (float2x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float2x4 fn(uint64_t2x4 y, uint64_t2x4 x) { \
+ return fn((float2x4)y, (float2x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x1 fn(uint64_t3x1 y, uint64_t3x1 x) { \
+ return fn((float3x1)y, (float3x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x2 fn(uint64_t3x2 y, uint64_t3x2 x) { \
+ return fn((float3x2)y, (float3x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x3 fn(uint64_t3x3 y, uint64_t3x3 x) { \
+ return fn((float3x3)y, (float3x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float3x4 fn(uint64_t3x4 y, uint64_t3x4 x) { \
+ return fn((float3x4)y, (float3x4)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x1 fn(uint64_t4x1 y, uint64_t4x1 x) { \
+ return fn((float4x1)y, (float4x1)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x2 fn(uint64_t4x2 y, uint64_t4x2 x) { \
+ return fn((float4x2)y, (float4x2)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x3 fn(uint64_t4x3 y, uint64_t4x3 x) { \
+ return fn((float4x3)y, (float4x3)x); \
+ } \
+ \
+ _DXC_DEPRECATED_INT_FN(fn) \
+ constexpr float4x4 fn(uint64_t4x4 y, uint64_t4x4 x) { \
+ return fn((float4x4)y, (float4x4)x); \
}
-
//===----------------------------------------------------------------------===//
// acos builtins overloads
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/atan2-overloads_mat.hlsl b/clang/test/CodeGenHLSL/builtins/atan2-overloads_mat.hlsl
new file mode 100644
index 0000000000000..5cf473fa13d98
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/atan2-overloads_mat.hlsl
@@ -0,0 +1,679 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK -DFNATTRS="hidden spir_func noundef nofpclass(nan inf)"
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_double1x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <2 x double> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <2 x double> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float1x2 test_atan2_double1x2 (double1x2 p0, double1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_double1x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <3 x double> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <3 x double> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float1x3 test_atan2_double1x3 (double1x3 p0, double1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_double1x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float1x4 test_atan2_double1x4 (double1x4 p0, double1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_double2x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <2 x double> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <2 x double> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float2x1 test_atan2_double2x1 (double2x1 p0, double2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_double2x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float2x2 test_atan2_double2x2 (double2x2 p0, double2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_double2x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <6 x double> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <6 x double> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float2x3 test_atan2_double2x3 (double2x3 p0, double2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_double2x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <8 x double> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <8 x double> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float2x4 test_atan2_double2x4 (double2x4 p0, double2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_double3x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <3 x double> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <3 x double> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float3x1 test_atan2_double3x1 (double3x1 p0, double3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_double3x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <6 x double> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <6 x double> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float3x2 test_atan2_double3x2 (double3x2 p0, double3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <9 x float> @_{{.*}}test_atan2_double3x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <9 x double> %{{.*}} to <9 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <9 x double> %{{.*}} to <9 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <9 x float> @llvm.atan2.v9f32(<9 x float> [[CONVI]], <9 x float> [[CONV1I]])
+// CHECK: ret <9 x float> [[V5]]
+float3x3 test_atan2_double3x3 (double3x3 p0, double3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_double3x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <12 x double> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <12 x double> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float3x4 test_atan2_double3x4 (double3x4 p0, double3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_double4x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <4 x double> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float4x1 test_atan2_double4x1 (double4x1 p0, double4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_double4x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <8 x double> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <8 x double> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float4x2 test_atan2_double4x2 (double4x2 p0, double4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_double4x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <12 x double> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <12 x double> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float4x3 test_atan2_double4x3 (double4x3 p0, double4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_{{.*}}test_atan2_double4x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_double4x4 (double4x4 p0, double4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_uint1x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float1x2 test_atan2_uint1x2 (uint1x2 p0, uint1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_uint1x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float1x3 test_atan2_uint1x3 (uint1x3 p0, uint1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint1x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float1x4 test_atan2_uint1x4 (uint1x4 p0, uint1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_uint2x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float2x1 test_atan2_uint2x1 (uint2x1 p0, uint2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint2x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float2x2 test_atan2_uint2x2 (uint2x2 p0, uint2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_uint2x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float2x3 test_atan2_uint2x3 (uint2x3 p0, uint2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_uint2x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float2x4 test_atan2_uint2x4 (uint2x4 p0, uint2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_uint3x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float3x1 test_atan2_uint3x1 (uint3x1 p0, uint3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_uint3x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float3x2 test_atan2_uint3x2 (uint3x2 p0, uint3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <9 x float> @_{{.*}}test_atan2_uint3x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <9 x i32> %{{.*}} to <9 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <9 x i32> %{{.*}} to <9 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <9 x float> @llvm.atan2.v9f32(<9 x float> [[CONVI]], <9 x float> [[CONV1I]])
+// CHECK: ret <9 x float> [[V5]]
+float3x3 test_atan2_uint3x3 (uint3x3 p0, uint3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_uint3x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float3x4 test_atan2_uint3x4 (uint3x4 p0, uint3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint4x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float4x1 test_atan2_uint4x1 (uint4x1 p0, uint4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_uint4x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float4x2 test_atan2_uint4x2 (uint4x2 p0, uint4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_uint4x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float4x3 test_atan2_uint4x3 (uint4x3 p0, uint4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_{{.*}}test_atan2_uint4x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_uint4x4 (uint4x4 p0, uint4x4 p1) {
+ return atan2(p0, p1);
+}
+
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_int1x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float1x2 test_atan2_int1x2 (int1x2 p0, int1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_int1x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float1x3 test_atan2_int1x3 (int1x3 p0, int1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int1x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float1x4 test_atan2_int1x4 (int1x4 p0, int1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_int2x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float2x1 test_atan2_int2x1 (int2x1 p0, int2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int2x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float2x2 test_atan2_int2x2 (int2x2 p0, int2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_int2x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float2x3 test_atan2_int2x3 (int2x3 p0, int2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_int2x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float2x4 test_atan2_int2x4 (int2x4 p0, int2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_int3x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float3x1 test_atan2_int3x1 (int3x1 p0, int3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_int3x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <6 x i32> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float3x2 test_atan2_int3x2 (int3x2 p0, int3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <9 x float> @_{{.*}}test_atan2_int3x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <9 x i32> %{{.*}} to <9 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <9 x i32> %{{.*}} to <9 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <9 x float> @llvm.atan2.v9f32(<9 x float> [[CONVI]], <9 x float> [[CONV1I]])
+// CHECK: ret <9 x float> [[V5]]
+float3x3 test_atan2_int3x3 (int3x3 p0, int3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_int3x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float3x4 test_atan2_int3x4 (int3x4 p0, int3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int4x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float4x1 test_atan2_int4x1 (int4x1 p0, int4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_int4x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <8 x i32> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float4x2 test_atan2_int4x2 (int4x2 p0, int4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_int4x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <12 x i32> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float4x3 test_atan2_int4x3 (int4x3 p0, int4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_{{.*}}test_atan2_int4x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_int4x4 (int4x4 p0, int4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_int64_t1x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float1x2 test_atan2_int64_t1x2 (int64_t1x2 p0, int64_t1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_int64_t1x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float1x3 test_atan2_int64_t1x3 (int64_t1x3 p0, int64_t1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int64_t1x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float1x4 test_atan2_int64_t1x4 (int64_t1x4 p0, int64_t1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_int64_t2x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float2x1 test_atan2_int64_t2x1 (int64_t2x1 p0, int64_t2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int64_t2x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float2x2 test_atan2_int64_t2x2 (int64_t2x2 p0, int64_t2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_int64_t2x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float2x3 test_atan2_int64_t2x3 (int64_t2x3 p0, int64_t2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_int64_t2x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float2x4 test_atan2_int64_t2x4 (int64_t2x4 p0, int64_t2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_int64_t3x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float3x1 test_atan2_int64_t3x1 (int64_t3x1 p0, int64_t3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_int64_t3x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float3x2 test_atan2_int64_t3x2 (int64_t3x2 p0, int64_t3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <9 x float> @_{{.*}}test_atan2_int64_t3x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <9 x i64> %{{.*}} to <9 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <9 x i64> %{{.*}} to <9 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <9 x float> @llvm.atan2.v9f32(<9 x float> [[CONVI]], <9 x float> [[CONV1I]])
+// CHECK: ret <9 x float> [[V5]]
+float3x3 test_atan2_int64_t3x3 (int64_t3x3 p0, int64_t3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_int64_t3x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float3x4 test_atan2_int64_t3x4 (int64_t3x4 p0, int64_t3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_int64_t4x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float4x1 test_atan2_int64_t4x1 (int64_t4x1 p0, int64_t4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_int64_t4x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float4x2 test_atan2_int64_t4x2 (int64_t4x2 p0, int64_t4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_int64_t4x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float4x3 test_atan2_int64_t4x3 (int64_t4x3 p0, int64_t4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_{{.*}}test_atan2_int64_t4x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_int64_t4x4 (int64_t4x4 p0, int64_t4x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_uint64_t1x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float1x2 test_atan2_uint64_t1x2 (uint64_t1x2 p0, uint64_t1x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_uint64_t1x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float1x3 test_atan2_uint64_t1x3 (uint64_t1x3 p0, uint64_t1x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint64_t1x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float1x4 test_atan2_uint64_t1x4 (uint64_t1x4 p0, uint64_t1x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <2 x float> @_{{.*}}test_atan2_uint64_t2x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <2 x float> @llvm.atan2.v2f32(<2 x float> [[CONVI]], <2 x float> [[CONV1I]])
+// CHECK: ret <2 x float> [[V5]]
+float2x1 test_atan2_uint64_t2x1 (uint64_t2x1 p0, uint64_t2x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint64_t2x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float2x2 test_atan2_uint64_t2x2 (uint64_t2x2 p0, uint64_t2x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_uint64_t2x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float2x3 test_atan2_uint64_t2x3 (uint64_t2x3 p0, uint64_t2x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_uint64_t2x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float2x4 test_atan2_uint64_t2x4 (uint64_t2x4 p0, uint64_t2x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @_{{.*}}test_atan2_uint64_t3x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <3 x float> @llvm.atan2.v3f32(<3 x float> [[CONVI]], <3 x float> [[CONV1I]])
+// CHECK: ret <3 x float> [[V5]]
+float3x1 test_atan2_uint64_t3x1 (uint64_t3x1 p0, uint64_t3x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <6 x float> @_{{.*}}test_atan2_uint64_t3x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <6 x i64> %{{.*}} to <6 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <6 x float> @llvm.atan2.v6f32(<6 x float> [[CONVI]], <6 x float> [[CONV1I]])
+// CHECK: ret <6 x float> [[V5]]
+float3x2 test_atan2_uint64_t3x2 (uint64_t3x2 p0, uint64_t3x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <9 x float> @_{{.*}}test_atan2_uint64_t3x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <9 x i64> %{{.*}} to <9 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <9 x i64> %{{.*}} to <9 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <9 x float> @llvm.atan2.v9f32(<9 x float> [[CONVI]], <9 x float> [[CONV1I]])
+// CHECK: ret <9 x float> [[V5]]
+float3x3 test_atan2_uint64_t3x3 (uint64_t3x3 p0, uint64_t3x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_uint64_t3x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float3x4 test_atan2_uint64_t3x4 (uint64_t3x4 p0, uint64_t3x4 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <4 x float> @_{{.*}}test_atan2_uint64_t4x1{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <4 x float> @llvm.atan2.v4f32(<4 x float> [[CONVI]], <4 x float> [[CONV1I]])
+// CHECK: ret <4 x float> [[V5]]
+float4x1 test_atan2_uint64_t4x1 (uint64_t4x1 p0, uint64_t4x1 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <8 x float> @_{{.*}}test_atan2_uint64_t4x2{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <8 x i64> %{{.*}} to <8 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <8 x float> @llvm.atan2.v8f32(<8 x float> [[CONVI]], <8 x float> [[CONV1I]])
+// CHECK: ret <8 x float> [[V5]]
+float4x2 test_atan2_uint64_t4x2 (uint64_t4x2 p0, uint64_t4x2 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <12 x float> @_{{.*}}test_atan2_uint64_t4x3{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <12 x i64> %{{.*}} to <12 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <12 x float> @llvm.atan2.v12f32(<12 x float> [[CONVI]], <12 x float> [[CONV1I]])
+// CHECK: ret <12 x float> [[V5]]
+float4x3 test_atan2_uint64_t4x3 (uint64_t4x3 p0, uint64_t4x3 p1) {
+ return atan2(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <16 x float> @_{{.*}}test_atan2_uint64_t4x4{{.*}}(
+// CHECK: [[CONVI:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[CONV1I:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
+// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
+// CHECK: ret <16 x float> [[V5]]
+float4x4 test_atan2_uint64_t4x4 (uint64_t4x4 p0, uint64_t4x4 p1) {
+ return atan2(p0, p1);
+}
>From 43d7cbc0b19c847b4c3328f8d4d2354114e504b2 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Fri, 1 May 2026 11:42:28 -0700
Subject: [PATCH 13/20] clean up
---
.../CodeGenHLSL/builtins/atan2-overloads.hlsl | 45 -------------------
1 file changed, 45 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
index 6ecad28850b00..85ff75110a78e 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2-overloads.hlsl
@@ -38,15 +38,6 @@ float4 test_atan2_double4 (double4 p0, double4 p1) {
return atan2(p0, p1);
}
-// CHECK: define [[FNATTRS]] <16 x float> @_Z20test_atan2_double4x4u11matrix_typeILj4ELj4EdES_(
-// CHECK: [[CONVI:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
-// CHECK: [[CONV1I:%.*]] = fptrunc {{.*}} <16 x double> %{{.*}} to <16 x float>
-// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
-// CHECK: ret <16 x float> [[V5]]
-float4x4 test_atan2_double4x4 (double4x4 p0, double4x4 p1) {
- return atan2(p0, p1);
-}
-
// CHECK: define [[FNATTRS]] float @_Z14test_atan2_intii(
// CHECK: [[CONVI:%.*]] = sitofp i32 %{{.*}} to float
// CHECK: [[CONV1I:%.*]] = sitofp i32 %{{.*}} to float
@@ -190,39 +181,3 @@ float3 test_atan2_uint64_t3 (uint64_t3 p0, uint64_t3 p1) {
float4 test_atan2_uint64_t4 (uint64_t4 p0, uint64_t4 p1) {
return atan2(p0, p1);
}
-
-// CHECK: define [[FNATTRS]] <16 x float> @_Z19test_atan2_int_t4x4u11matrix_typeILj4ELj4EiES_(
-// CHECK: [[CONVI:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
-// CHECK: [[CONV1I:%.*]] = sitofp <16 x i32> %{{.*}} to <16 x float>
-// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
-// CHECK: ret <16 x float> [[V5]]
-float4x4 test_atan2_int_t4x4 (int4x4 p0, int4x4 p1) {
- return atan2(p0, p1);
-}
-
-// CHECK: define [[FNATTRS]] <16 x float> @_Z20test_atan2_uint_t4x4u11matrix_typeILj4ELj4EjES_(
-// CHECK: [[CONVI:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
-// CHECK: [[CONV1I:%.*]] = uitofp <16 x i32> %{{.*}} to <16 x float>
-// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
-// CHECK: ret <16 x float> [[V5]]
-float4x4 test_atan2_uint_t4x4 (uint4x4 p0, uint4x4 p1) {
- return atan2(p0, p1);
-}
-
-// CHECK: define [[FNATTRS]] <16 x float> @_Z21test_atan2_int64_t4x4u11matrix_typeILj4ELj4ElES_(
-// CHECK: [[CONVI:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
-// CHECK: [[CONV1I:%.*]] = sitofp <16 x i64> %{{.*}} to <16 x float>
-// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
-// CHECK: ret <16 x float> [[V5]]
-float4x4 test_atan2_int64_t4x4 (int64_t4x4 p0, int64_t4x4 p1) {
- return atan2(p0, p1);
-}
-
-// CHECK: define [[FNATTRS]] <16 x float> @_Z22test_atan2_uint64_t4x4u11matrix_typeILj4ELj4EmES_(
-// CHECK: [[CONVI:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
-// CHECK: [[CONV1I:%.*]] = uitofp <16 x i64> %{{.*}} to <16 x float>
-// CHECK: [[V5:%.*]] = call {{.*}} <16 x float> @llvm.atan2.v16f32(<16 x float> [[CONVI]], <16 x float> [[CONV1I]])
-// CHECK: ret <16 x float> [[V5]]
-float4x4 test_atan2_uint64_t4x4 (uint64_t4x4 p0, uint64_t4x4 p1) {
- return atan2(p0, p1);
-}
>From 810ceebc71eafbf5fea23bfb50638df5f3d8ecad Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Fri, 1 May 2026 17:08:51 -0700
Subject: [PATCH 14/20] fix tests
---
.../test/CodeGenHLSL/builtins/atan2_mat.hlsl | 13 ----
.../binary-compat-overload-warnings.hlsl | 10 +--
llvm/test/CodeGen/DirectX/atan2.ll | 64 +++++++++++++++++++
3 files changed, 69 insertions(+), 18 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl b/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
index db9439edfcc11..f0d2517a000ee 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2_mat.hlsl
@@ -6,13 +6,6 @@
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-// CHECK-LABEL: test_atan2_half1x1
-// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <1 x half> @llvm.atan2.v1f16
-// NO_HALF: call reassoc nnan ninf nsz arcp afn <1 x float> @llvm.atan2.v1f32
-half1x1 test_atan2_half1x1 (half1x1 p0, half1x1 p1) {
- return atan2(p0, p1);
-}
-
// CHECK-LABEL: test_atan2_half1x2
// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan2.v2f16
// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
@@ -118,12 +111,6 @@ half4x4 test_atan2_half4x4 (half4x4 p0, half4x4 p1) {
return atan2(p0, p1);
}
-// CHECK-LABEL: test_atan2_float1x1
-// CHECK: call reassoc nnan ninf nsz arcp afn <1 x float> @llvm.atan2.v1f32
-float1x1 test_atan2_float1x1 (float1x1 p0, float1x1 p1) {
- return atan2(p0, p1);
-}
-
// CHECK-LABEL: test_atan2_float1x2
// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
float1x2 test_atan2_float1x2 (float1x2 p0, float1x2 p1) {
diff --git a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
index 7b93ea089d854..fada02f4b7c1a 100644
--- a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
@@ -24,7 +24,7 @@ float4 test_binary_double4(double4 p0) {
}
float4x4 test_binary_double4x4(double4x4 p0) {
- // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x 64 bit API lowering for [[FUNC]] is deprecated. Explicitly cast parameters to 32 or 16 bit types.
+ // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x 64 bit API lowering for [[FUNC]] is deprecated. Explicitly cast parameters to 32 or 16 bit types.
return FUNC(p0, p0);
}
@@ -51,21 +51,21 @@ float test_binary_int(uint64_t p0) {
}
float4x4 test_binary_uint4x4(uint4x4 p0) {
- // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_int4x4(int4x4 p0) {
- // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_int64_t4x4(int64_t4x4 p0) {
- // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_uint64_t4x4(uint64_t4x4 p0) {
- // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll
index 8f51ab1b7a902..3a1a9d8fc80ac 100644
--- a/llvm/test/CodeGen/DirectX/atan2.ll
+++ b/llvm/test/CodeGen/DirectX/atan2.ll
@@ -82,6 +82,70 @@ entry:
ret <4 x float> %elt.atan2
}
+define noundef <16 x half> @atan2_half4x4(<16 x half> noundef %y, <16 x half> noundef %x) {
+entry:
+; Just Expansion, no scalarization or lowering:
+; EXPCHECK: [[DIV:%.+]] = fdiv <16 x half> %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call <16 x half> @llvm.atan.v16f16(<16 x half> [[DIV]])
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x half> [[ATAN]], splat (half 0xH4248)
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x half> [[ATAN]], splat (half 0xH4248)
+; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x half> %x, zeroinitializer
+; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x half> %x, zeroinitializer
+; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x half> %y, zeroinitializer
+; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x half> %y, zeroinitializer
+; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x half> [[ADD_PI]], <16 x half> [[ATAN]]
+; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x half> [[SUB_PI]], <16 x half> [[SELECT_ADD_PI]]
+; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x half> splat (half 0xHBE48), <16 x half> [[SELECT_SUB_PI]]
+; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x half> splat (half 0xH3E48), <16 x half> [[SELECT_NEGHPI]]
+; EXPCHECK: ret <16 x half> [[SELECT_HPI]]
+
+; Scalarization occurs after expansion, so atan scalarization is tested separately.
+; Expansion, scalarization and lowering:
+; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
+; DOPCHECK-COUNT-16: call half @dx.op.unary.f16(i32 17, half %{{.*}})
+; DOPCHECK-NOT: call half @dx.op.unary.f16(i32 17,
+
+ %elt.atan2 = call <16 x half> @llvm.atan2.v16f16(<16 x half> %y, <16 x half> %x)
+ ret <16 x half> %elt.atan2
+}
+
+define noundef <16 x float> @atan2_float4x4(<16 x float> noundef %y, <16 x float> noundef %x) {
+entry:
+; Just Expansion, no scalarization or lowering:
+; EXPCHECK: [[DIV:%.+]] = fdiv <16 x float> %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call <16 x float> @llvm.atan.v16f32(<16 x float> [[DIV]])
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
+; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x float> %y, zeroinitializer
+; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x float> %y, zeroinitializer
+; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x float> [[ADD_PI]], <16 x float> [[ATAN]]
+; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x float> [[SUB_PI]], <16 x float> [[SELECT_ADD_PI]]
+; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x float> splat (float 0xBFF921FB60000000), <16 x float> [[SELECT_SUB_PI]]
+; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x float> splat (float 0x3FF921FB60000000), <16 x float> [[SELECT_NEGHPI]]
+; EXPCHECK: ret <16 x float> [[SELECT_HPI]]
+
+; Scalarization occurs after expansion, so atan scalarization is tested separately.
+; Expansion, scalarization and lowering:
+; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
+; DOPCHECK-COUNT-16: call float @dx.op.unary.f32(i32 17, float %{{.*}})
+; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
+
+ %elt.atan2 = call <16 x float> @llvm.atan2.v16f32(<16 x float> %y, <16 x float> %x)
+ ret <16 x float> %elt.atan2
+}
+
declare half @llvm.atan2.f16(half, half)
declare float @llvm.atan2.f32(float, float)
declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
+declare <16 x float> @llvm.atan2.v16f32(<16 x float>, <16 x float>)
+declare <16 x half> @llvm.atan2.v16f16(<16 x half>, <16 x half>)
>From f075a16ebc03ac65209d572dabd6942d60eacd13 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 7 May 2026 14:06:28 -0700
Subject: [PATCH 15/20] fix legalization issue and improve tests
---
llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 19 ++-
.../SPIRV/hlsl-intrinsics/atan2_mat.ll | 147 ++++++++++++++++++
2 files changed, 165 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2_mat.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 47ffecc4085ab..de91c751e0079 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -135,10 +135,22 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
auto allFloatScalarsAndF16Vector2AndVector4s = {s16, s32, s64, v2s16, v4s16};
+ auto allFloatScalars = {s16, s32, s64};
+
auto allFloatScalarsAndVectors = {
s16, s32, s64, v2s16, v2s32, v2s64, v3s16, v3s32, v3s64,
v4s16, v4s32, v4s64, v8s16, v8s32, v8s64, v16s16, v16s32, v16s64};
+ auto allShaderFloatVectors = {v2s16, v2s32, v2s64, v3s16, v3s32,
+ v3s64, v4s16, v4s32, v4s64};
+
+ auto allFloatVectors = {v2s16, v2s32, v2s64, v3s16, v3s32,
+ v3s64, v4s16, v4s32, v4s64, v8s16,
+ v8s32, v8s64, v16s16, v16s32, v16s64};
+
+ auto &allowedFloatVectorTypes =
+ ST.isShader() ? allShaderFloatVectors : allFloatVectors;
+
auto allFloatAndIntScalarsAndPtrs = {s8, s16, s32, s64, p0, p1,
p2, p3, p4, p5, p6, p7,
p8, p9, p10, p11, p12, p13};
@@ -490,7 +502,12 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
G_FMINIMUM,
G_FMAXIMUM,
G_INTRINSIC_ROUNDEVEN})
- .legalFor(allFloatScalarsAndVectors);
+ .legalFor(allFloatScalars)
+ .legalFor(allowedFloatVectorTypes)
+ .moreElementsToNextPow2(0)
+ .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize),
+ LegalizeMutations::changeElementCountTo(
+ 0, ElementCount::getFixed(MaxVectorSize)));
// clang-format on
getActionDefinitionsBuilder(G_FCOPYSIGN)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2_mat.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2_mat.ll
new file mode 100644
index 0000000000000..f377c32c0ab6a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2_mat.ll
@@ -0,0 +1,147 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; Vulkan/Shader does not allow the Vector16 capability, so a 4x4 matrix is
+; represented as [4 x <4 x float>] in LLVM IR and the elementwise atan2 is
+; computed per-row as 4 OpExtInst Atan2 calls on <4 x float> (and similarly
+; for half).
+
+; CHECK-NOT: OpCapability Vector16
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#void:]] = OpTypeVoid
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#const_0:]] = OpConstant %[[#int_32]] 0
+; CHECK-DAG: %[[#const_1:]] = OpConstant %[[#int_32]] 1
+; CHECK-DAG: %[[#const_2:]] = OpConstant %[[#int_32]] 2
+; CHECK-DAG: %[[#const_3:]] = OpConstant %[[#int_32]] 3
+; CHECK-DAG: %[[#const_4:]] = OpConstant %[[#int_32]] 4
+; CHECK-DAG: %[[#arr_f32:]] = OpTypeArray %[[#vec4_float_32]] %[[#const_4]]
+; CHECK-DAG: %[[#arr_f16:]] = OpTypeArray %[[#vec4_float_16]] %[[#const_4]]
+; CHECK-DAG: %[[#ptr_arr_f32:]] = OpTypePointer Private %[[#arr_f32]]
+; CHECK-DAG: %[[#ptr_arr_f16:]] = OpTypePointer Private %[[#arr_f16]]
+; CHECK-DAG: %[[#ptr_vec4_f32:]] = OpTypePointer Private %[[#vec4_float_32]]
+; CHECK-DAG: %[[#ptr_vec4_f16:]] = OpTypePointer Private %[[#vec4_float_16]]
+; CHECK-DAG: %[[#fn_f32:]] = OpTypeFunction %[[#void]] %[[#ptr_arr_f32]] %[[#ptr_arr_f32]] %[[#ptr_arr_f32]]
+; CHECK-DAG: %[[#fn_f16:]] = OpTypeFunction %[[#void]] %[[#ptr_arr_f16]] %[[#ptr_arr_f16]] %[[#ptr_arr_f16]]
+
+define internal void @atan2_float4x4(ptr addrspace(10) %out, ptr addrspace(10) %a, ptr addrspace(10) %b) {
+entry:
+ ; CHECK: OpFunction %[[#void]] None %[[#fn_f32]]
+ ; CHECK: %[[#out_f32:]] = OpFunctionParameter %[[#ptr_arr_f32]]
+ ; CHECK: %[[#a_f32:]] = OpFunctionParameter %[[#ptr_arr_f32]]
+ ; CHECK: %[[#b_f32:]] = OpFunctionParameter %[[#ptr_arr_f32]]
+ ; CHECK: %[[#a0_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#a_f32]] %[[#const_0]]
+ ; CHECK: %[[#a1_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#a_f32]] %[[#const_1]]
+ ; CHECK: %[[#a2_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#a_f32]] %[[#const_2]]
+ ; CHECK: %[[#a3_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#a_f32]] %[[#const_3]]
+ ; CHECK: %[[#b0_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#b_f32]] %[[#const_0]]
+ ; CHECK: %[[#b1_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#b_f32]] %[[#const_1]]
+ ; CHECK: %[[#b2_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#b_f32]] %[[#const_2]]
+ ; CHECK: %[[#b3_ptr_f32:]] = OpAccessChain %[[#ptr_vec4_f32]] %[[#b_f32]] %[[#const_3]]
+ ; CHECK: %[[#a0_f32:]] = OpLoad %[[#vec4_float_32]] %[[#a0_ptr_f32]]
+ ; CHECK: %[[#a1_f32:]] = OpLoad %[[#vec4_float_32]] %[[#a1_ptr_f32]]
+ ; CHECK: %[[#a2_f32:]] = OpLoad %[[#vec4_float_32]] %[[#a2_ptr_f32]]
+ ; CHECK: %[[#a3_f32:]] = OpLoad %[[#vec4_float_32]] %[[#a3_ptr_f32]]
+ ; CHECK: %[[#b0_f32:]] = OpLoad %[[#vec4_float_32]] %[[#b0_ptr_f32]]
+ ; CHECK: %[[#b1_f32:]] = OpLoad %[[#vec4_float_32]] %[[#b1_ptr_f32]]
+ ; CHECK: %[[#b2_f32:]] = OpLoad %[[#vec4_float_32]] %[[#b2_ptr_f32]]
+ ; CHECK: %[[#b3_f32:]] = OpLoad %[[#vec4_float_32]] %[[#b3_ptr_f32]]
+ ; CHECK: OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#a0_f32]] %[[#b0_f32]]
+ ; CHECK: OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#a1_f32]] %[[#b1_f32]]
+ ; CHECK: OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#a2_f32]] %[[#b2_f32]]
+ ; CHECK: OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#a3_f32]] %[[#b3_f32]]
+ %a0 = getelementptr [4 x <4 x float>], ptr addrspace(10) %a, i32 0, i32 0
+ %a1 = getelementptr [4 x <4 x float>], ptr addrspace(10) %a, i32 0, i32 1
+ %a2 = getelementptr [4 x <4 x float>], ptr addrspace(10) %a, i32 0, i32 2
+ %a3 = getelementptr [4 x <4 x float>], ptr addrspace(10) %a, i32 0, i32 3
+ %b0 = getelementptr [4 x <4 x float>], ptr addrspace(10) %b, i32 0, i32 0
+ %b1 = getelementptr [4 x <4 x float>], ptr addrspace(10) %b, i32 0, i32 1
+ %b2 = getelementptr [4 x <4 x float>], ptr addrspace(10) %b, i32 0, i32 2
+ %b3 = getelementptr [4 x <4 x float>], ptr addrspace(10) %b, i32 0, i32 3
+ %va0 = load <4 x float>, ptr addrspace(10) %a0
+ %va1 = load <4 x float>, ptr addrspace(10) %a1
+ %va2 = load <4 x float>, ptr addrspace(10) %a2
+ %va3 = load <4 x float>, ptr addrspace(10) %a3
+ %vb0 = load <4 x float>, ptr addrspace(10) %b0
+ %vb1 = load <4 x float>, ptr addrspace(10) %b1
+ %vb2 = load <4 x float>, ptr addrspace(10) %b2
+ %vb3 = load <4 x float>, ptr addrspace(10) %b3
+ %r0 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %va0, <4 x float> %vb0)
+ %r1 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %va1, <4 x float> %vb1)
+ %r2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %va2, <4 x float> %vb2)
+ %r3 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %va3, <4 x float> %vb3)
+ %out0 = getelementptr [4 x <4 x float>], ptr addrspace(10) %out, i32 0, i32 0
+ %out1 = getelementptr [4 x <4 x float>], ptr addrspace(10) %out, i32 0, i32 1
+ %out2 = getelementptr [4 x <4 x float>], ptr addrspace(10) %out, i32 0, i32 2
+ %out3 = getelementptr [4 x <4 x float>], ptr addrspace(10) %out, i32 0, i32 3
+ store <4 x float> %r0, ptr addrspace(10) %out0
+ store <4 x float> %r1, ptr addrspace(10) %out1
+ store <4 x float> %r2, ptr addrspace(10) %out2
+ store <4 x float> %r3, ptr addrspace(10) %out3
+ ret void
+}
+
+define internal void @atan2_half4x4(ptr addrspace(10) %out, ptr addrspace(10) %a, ptr addrspace(10) %b) {
+entry:
+ ; CHECK: OpFunction %[[#void]] None %[[#fn_f16]]
+ ; CHECK: %[[#out_f16:]] = OpFunctionParameter %[[#ptr_arr_f16]]
+ ; CHECK: %[[#a_f16:]] = OpFunctionParameter %[[#ptr_arr_f16]]
+ ; CHECK: %[[#b_f16:]] = OpFunctionParameter %[[#ptr_arr_f16]]
+ ; CHECK: %[[#a0_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#a_f16]] %[[#const_0]]
+ ; CHECK: %[[#a1_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#a_f16]] %[[#const_1]]
+ ; CHECK: %[[#a2_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#a_f16]] %[[#const_2]]
+ ; CHECK: %[[#a3_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#a_f16]] %[[#const_3]]
+ ; CHECK: %[[#b0_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#b_f16]] %[[#const_0]]
+ ; CHECK: %[[#b1_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#b_f16]] %[[#const_1]]
+ ; CHECK: %[[#b2_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#b_f16]] %[[#const_2]]
+ ; CHECK: %[[#b3_ptr_f16:]] = OpAccessChain %[[#ptr_vec4_f16]] %[[#b_f16]] %[[#const_3]]
+ ; CHECK: %[[#a0_f16:]] = OpLoad %[[#vec4_float_16]] %[[#a0_ptr_f16]]
+ ; CHECK: %[[#a1_f16:]] = OpLoad %[[#vec4_float_16]] %[[#a1_ptr_f16]]
+ ; CHECK: %[[#a2_f16:]] = OpLoad %[[#vec4_float_16]] %[[#a2_ptr_f16]]
+ ; CHECK: %[[#a3_f16:]] = OpLoad %[[#vec4_float_16]] %[[#a3_ptr_f16]]
+ ; CHECK: %[[#b0_f16:]] = OpLoad %[[#vec4_float_16]] %[[#b0_ptr_f16]]
+ ; CHECK: %[[#b1_f16:]] = OpLoad %[[#vec4_float_16]] %[[#b1_ptr_f16]]
+ ; CHECK: %[[#b2_f16:]] = OpLoad %[[#vec4_float_16]] %[[#b2_ptr_f16]]
+ ; CHECK: %[[#b3_f16:]] = OpLoad %[[#vec4_float_16]] %[[#b3_ptr_f16]]
+ ; CHECK: OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#a0_f16]] %[[#b0_f16]]
+ ; CHECK: OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#a1_f16]] %[[#b1_f16]]
+ ; CHECK: OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#a2_f16]] %[[#b2_f16]]
+ ; CHECK: OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#a3_f16]] %[[#b3_f16]]
+ %a0 = getelementptr [4 x <4 x half>], ptr addrspace(10) %a, i32 0, i32 0
+ %a1 = getelementptr [4 x <4 x half>], ptr addrspace(10) %a, i32 0, i32 1
+ %a2 = getelementptr [4 x <4 x half>], ptr addrspace(10) %a, i32 0, i32 2
+ %a3 = getelementptr [4 x <4 x half>], ptr addrspace(10) %a, i32 0, i32 3
+ %b0 = getelementptr [4 x <4 x half>], ptr addrspace(10) %b, i32 0, i32 0
+ %b1 = getelementptr [4 x <4 x half>], ptr addrspace(10) %b, i32 0, i32 1
+ %b2 = getelementptr [4 x <4 x half>], ptr addrspace(10) %b, i32 0, i32 2
+ %b3 = getelementptr [4 x <4 x half>], ptr addrspace(10) %b, i32 0, i32 3
+ %va0 = load <4 x half>, ptr addrspace(10) %a0
+ %va1 = load <4 x half>, ptr addrspace(10) %a1
+ %va2 = load <4 x half>, ptr addrspace(10) %a2
+ %va3 = load <4 x half>, ptr addrspace(10) %a3
+ %vb0 = load <4 x half>, ptr addrspace(10) %b0
+ %vb1 = load <4 x half>, ptr addrspace(10) %b1
+ %vb2 = load <4 x half>, ptr addrspace(10) %b2
+ %vb3 = load <4 x half>, ptr addrspace(10) %b3
+ %r0 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %va0, <4 x half> %vb0)
+ %r1 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %va1, <4 x half> %vb1)
+ %r2 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %va2, <4 x half> %vb2)
+ %r3 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %va3, <4 x half> %vb3)
+ %out0 = getelementptr [4 x <4 x half>], ptr addrspace(10) %out, i32 0, i32 0
+ %out1 = getelementptr [4 x <4 x half>], ptr addrspace(10) %out, i32 0, i32 1
+ %out2 = getelementptr [4 x <4 x half>], ptr addrspace(10) %out, i32 0, i32 2
+ %out3 = getelementptr [4 x <4 x half>], ptr addrspace(10) %out, i32 0, i32 3
+ store <4 x half> %r0, ptr addrspace(10) %out0
+ store <4 x half> %r1, ptr addrspace(10) %out1
+ store <4 x half> %r2, ptr addrspace(10) %out2
+ store <4 x half> %r3, ptr addrspace(10) %out3
+ ret void
+}
+
+declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
+declare <4 x half> @llvm.atan2.v4f16(<4 x half>, <4 x half>)
>From 70e57b6de10866d0368010b756a41460814478af Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 7 May 2026 18:11:22 -0700
Subject: [PATCH 16/20] fixing test
---
llvm/test/CodeGen/DirectX/atan2.ll | 64 ------------------------
llvm/test/CodeGen/DirectX/atan2_mat.ll | 69 ++++++++++++++++++++++++++
2 files changed, 69 insertions(+), 64 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/atan2_mat.ll
diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll
index 3a1a9d8fc80ac..8f51ab1b7a902 100644
--- a/llvm/test/CodeGen/DirectX/atan2.ll
+++ b/llvm/test/CodeGen/DirectX/atan2.ll
@@ -82,70 +82,6 @@ entry:
ret <4 x float> %elt.atan2
}
-define noundef <16 x half> @atan2_half4x4(<16 x half> noundef %y, <16 x half> noundef %x) {
-entry:
-; Just Expansion, no scalarization or lowering:
-; EXPCHECK: [[DIV:%.+]] = fdiv <16 x half> %y, %x
-; EXPCHECK: [[ATAN:%.+]] = call <16 x half> @llvm.atan.v16f16(<16 x half> [[DIV]])
-; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x half> [[ATAN]], splat (half 0xH4248)
-; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x half> [[ATAN]], splat (half 0xH4248)
-; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x half> %x, zeroinitializer
-; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x half> %x, zeroinitializer
-; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x half> %y, zeroinitializer
-; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x half> %y, zeroinitializer
-; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x half> [[ADD_PI]], <16 x half> [[ATAN]]
-; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x half> [[SUB_PI]], <16 x half> [[SELECT_ADD_PI]]
-; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x half> splat (half 0xHBE48), <16 x half> [[SELECT_SUB_PI]]
-; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x half> splat (half 0xH3E48), <16 x half> [[SELECT_NEGHPI]]
-; EXPCHECK: ret <16 x half> [[SELECT_HPI]]
-
-; Scalarization occurs after expansion, so atan scalarization is tested separately.
-; Expansion, scalarization and lowering:
-; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
-; DOPCHECK-COUNT-16: call half @dx.op.unary.f16(i32 17, half %{{.*}})
-; DOPCHECK-NOT: call half @dx.op.unary.f16(i32 17,
-
- %elt.atan2 = call <16 x half> @llvm.atan2.v16f16(<16 x half> %y, <16 x half> %x)
- ret <16 x half> %elt.atan2
-}
-
-define noundef <16 x float> @atan2_float4x4(<16 x float> noundef %y, <16 x float> noundef %x) {
-entry:
-; Just Expansion, no scalarization or lowering:
-; EXPCHECK: [[DIV:%.+]] = fdiv <16 x float> %y, %x
-; EXPCHECK: [[ATAN:%.+]] = call <16 x float> @llvm.atan.v16f32(<16 x float> [[DIV]])
-; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
-; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
-; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x float> %x, zeroinitializer
-; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x float> %x, zeroinitializer
-; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x float> %y, zeroinitializer
-; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x float> %y, zeroinitializer
-; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x float> [[ADD_PI]], <16 x float> [[ATAN]]
-; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x float> [[SUB_PI]], <16 x float> [[SELECT_ADD_PI]]
-; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x float> splat (float 0xBFF921FB60000000), <16 x float> [[SELECT_SUB_PI]]
-; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x float> splat (float 0x3FF921FB60000000), <16 x float> [[SELECT_NEGHPI]]
-; EXPCHECK: ret <16 x float> [[SELECT_HPI]]
-
-; Scalarization occurs after expansion, so atan scalarization is tested separately.
-; Expansion, scalarization and lowering:
-; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
-; DOPCHECK-COUNT-16: call float @dx.op.unary.f32(i32 17, float %{{.*}})
-; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
-
- %elt.atan2 = call <16 x float> @llvm.atan2.v16f32(<16 x float> %y, <16 x float> %x)
- ret <16 x float> %elt.atan2
-}
-
declare half @llvm.atan2.f16(half, half)
declare float @llvm.atan2.f32(float, float)
declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
-declare <16 x float> @llvm.atan2.v16f32(<16 x float>, <16 x float>)
-declare <16 x half> @llvm.atan2.v16f16(<16 x half>, <16 x half>)
diff --git a/llvm/test/CodeGen/DirectX/atan2_mat.ll b/llvm/test/CodeGen/DirectX/atan2_mat.ll
new file mode 100644
index 0000000000000..c47fd781b2ede
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/atan2_mat.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefix=EXPCHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefix=DOPCHECK
+
+; Make sure correct dxil expansions for atan2 are generated for float and half.
+
+define noundef <16 x half> @atan2_half4x4(<16 x half> noundef %y, <16 x half> noundef %x) {
+entry:
+; Just Expansion, no scalarization or lowering:
+; EXPCHECK: [[DIV:%.+]] = fdiv <16 x half> %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call <16 x half> @llvm.atan.v16f16(<16 x half> [[DIV]])
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x half> [[ATAN]], splat (half 0xH4248)
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x half> [[ATAN]], splat (half 0xH4248)
+; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x half> %x, zeroinitializer
+; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x half> %x, zeroinitializer
+; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x half> %y, zeroinitializer
+; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x half> %y, zeroinitializer
+; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x half> [[ADD_PI]], <16 x half> [[ATAN]]
+; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x half> [[SUB_PI]], <16 x half> [[SELECT_ADD_PI]]
+; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x half> splat (half 0xHBE48), <16 x half> [[SELECT_SUB_PI]]
+; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x half> splat (half 0xH3E48), <16 x half> [[SELECT_NEGHPI]]
+; EXPCHECK: ret <16 x half> [[SELECT_HPI]]
+
+; Scalarization occurs after expansion, so atan scalarization is tested separately.
+; Expansion, scalarization and lowering:
+; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
+; DOPCHECK-COUNT-16: call half @dx.op.unary.f16(i32 17, half %{{.*}})
+; DOPCHECK-NOT: call half @dx.op.unary.f16(i32 17,
+
+ %elt.atan2 = call <16 x half> @llvm.atan2.v16f16(<16 x half> %y, <16 x half> %x)
+ ret <16 x half> %elt.atan2
+}
+
+define noundef <16 x float> @atan2_float4x4(<16 x float> noundef %y, <16 x float> noundef %x) {
+entry:
+; Just Expansion, no scalarization or lowering:
+; EXPCHECK: [[DIV:%.+]] = fdiv <16 x float> %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call <16 x float> @llvm.atan.v16f32(<16 x float> [[DIV]])
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x float> [[ATAN]], splat (float 0x400921FB60000000)
+; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x float> %y, zeroinitializer
+; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <16 x float> %y, zeroinitializer
+; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <16 x i1> [[XLT0_AND_YGE0]], <16 x float> [[ADD_PI]], <16 x float> [[ATAN]]
+; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <16 x i1> [[X_LT_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <16 x i1> [[XLT0_AND_YLT0]], <16 x float> [[SUB_PI]], <16 x float> [[SELECT_ADD_PI]]
+; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <16 x i1> [[XEQ0_AND_YLT0]], <16 x float> splat (float 0xBFF921FB60000000), <16 x float> [[SELECT_SUB_PI]]
+; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <16 x i1> [[X_EQ_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_HPI:%.+]] = select <16 x i1> [[XEQ0_AND_YGE0]], <16 x float> splat (float 0x3FF921FB60000000), <16 x float> [[SELECT_NEGHPI]]
+; EXPCHECK: ret <16 x float> [[SELECT_HPI]]
+
+; Scalarization occurs after expansion, so atan scalarization is tested separately.
+; Expansion, scalarization and lowering:
+; Just make sure this expands to exactly 16 scalar DXIL atan (OpCode=17) calls.
+; DOPCHECK-COUNT-16: call float @dx.op.unary.f32(i32 17, float %{{.*}})
+; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
+
+ %elt.atan2 = call <16 x float> @llvm.atan2.v16f32(<16 x float> %y, <16 x float> %x)
+ ret <16 x float> %elt.atan2
+}
+
+declare <16 x float> @llvm.atan2.v16f32(<16 x float>, <16 x float>)
+declare <16 x half> @llvm.atan2.v16f16(<16 x half>, <16 x half>)
>From a095c36ced3c269248e1022b7f6673c1fb6f6430 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 7 May 2026 18:48:12 -0700
Subject: [PATCH 17/20] clean up
---
.../lib/Headers/hlsl/hlsl_compat_overloads.h | 402 +-----------------
.../binary-compat-overload-warnings.hlsl | 10 +-
2 files changed, 25 insertions(+), 387 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index 08af61bed7b9a..12223aee6a4e7 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -231,403 +231,41 @@ namespace hlsl {
}
#define _DXC_COMPAT_BINARY_DOUBLE_MATRIX_OVERLOADS(fn) \
+ template <uint R, uint C> \
_DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float1x1 fn(double1x1 y, double1x1 x) { \
- return fn((float1x1)y, (float1x1)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float1x2 fn(double1x2 y, double1x2 x) { \
- return fn((float1x2)y, (float1x2)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float1x3 fn(double1x3 y, double1x3 x) { \
- return fn((float1x3)y, (float1x3)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float1x4 fn(double1x4 y, double1x4 x) { \
- return fn((float1x4)y, (float1x4)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float2x1 fn(double2x1 y, double2x1 x) { \
- return fn((float2x1)y, (float2x1)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float2x2 fn(double2x2 y, double2x2 x) { \
- return fn((float2x2)y, (float2x2)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float2x3 fn(double2x3 y, double2x3 x) { \
- return fn((float2x3)y, (float2x3)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float2x4 fn(double2x4 y, double2x4 x) { \
- return fn((float2x4)y, (float2x4)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float3x1 fn(double3x1 y, double3x1 x) { \
- return fn((float3x1)y, (float3x1)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float3x2 fn(double3x2 y, double3x2 x) { \
- return fn((float3x2)y, (float3x2)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float3x3 fn(double3x3 y, double3x3 x) { \
- return fn((float3x3)y, (float3x3)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float3x4 fn(double3x4 y, double3x4 x) { \
- return fn((float3x4)y, (float3x4)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float4x1 fn(double4x1 y, double4x1 x) { \
- return fn((float4x1)y, (float4x1)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float4x2 fn(double4x2 y, double4x2 x) { \
- return fn((float4x2)y, (float4x2)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float4x3 fn(double4x3 y, double4x3 x) { \
- return fn((float4x3)y, (float4x3)x); \
- } \
- \
- _DXC_DEPRECATED_64BIT_FN(fn) \
- constexpr float4x4 fn(double4x4 y, double4x4 x) { \
- return fn((float4x4)y, (float4x4)x); \
+ constexpr matrix<float, R, C> fn(matrix<double, R, C> y, \
+ matrix<double, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
}
#define _DXC_COMPAT_BINARY_INTEGER_MATRIX_OVERLOADS(fn) \
+ template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x1 fn(int1x1 y, int1x1 x) { \
- return fn((float1x1)y, (float1x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x2 fn(int1x2 y, int1x2 x) { \
- return fn((float1x2)y, (float1x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x3 fn(int1x3 y, int1x3 x) { \
- return fn((float1x3)y, (float1x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x4 fn(int1x4 y, int1x4 x) { \
- return fn((float1x4)y, (float1x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x1 fn(int2x1 y, int2x1 x) { \
- return fn((float2x1)y, (float2x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x2 fn(int2x2 y, int2x2 x) { \
- return fn((float2x2)y, (float2x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x3 fn(int2x3 y, int2x3 x) { \
- return fn((float2x3)y, (float2x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x4 fn(int2x4 y, int2x4 x) { \
- return fn((float2x4)y, (float2x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x1 fn(int3x1 y, int3x1 x) { \
- return fn((float3x1)y, (float3x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x2 fn(int3x2 y, int3x2 x) { \
- return fn((float3x2)y, (float3x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x3 fn(int3x3 y, int3x3 x) { \
- return fn((float3x3)y, (float3x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x4 fn(int3x4 y, int3x4 x) { \
- return fn((float3x4)y, (float3x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x1 fn(int4x1 y, int4x1 x) { \
- return fn((float4x1)y, (float4x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x2 fn(int4x2 y, int4x2 x) { \
- return fn((float4x2)y, (float4x2)x); \
+ constexpr matrix<float, R, C> fn(matrix<int, R, C> y, matrix<int, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
} \
\
+ template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x3 fn(int4x3 y, int4x3 x) { \
- return fn((float4x3)y, (float4x3)x); \
+ constexpr matrix<float, R, C> fn(matrix<uint, R, C> y, \
+ matrix<uint, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
} \
\
+ template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x4 fn(int4x4 y, int4x4 x) { \
- return fn((float4x4)y, (float4x4)x); \
- } \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x1 fn(uint1x1 y, uint1x1 x) { \
- return fn((float1x1)y, (float1x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x2 fn(uint1x2 y, uint1x2 x) { \
- return fn((float1x2)y, (float1x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x3 fn(uint1x3 y, uint1x3 x) { \
- return fn((float1x3)y, (float1x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x4 fn(uint1x4 y, uint1x4 x) { \
- return fn((float1x4)y, (float1x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x1 fn(uint2x1 y, uint2x1 x) { \
- return fn((float2x1)y, (float2x1)x); \
+ constexpr matrix<float, R, C> fn(matrix<int64_t, R, C> y, \
+ matrix<int64_t, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
} \
\
+ template <uint R, uint C> \
_DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x2 fn(uint2x2 y, uint2x2 x) { \
- return fn((float2x2)y, (float2x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x3 fn(uint2x3 y, uint2x3 x) { \
- return fn((float2x3)y, (float2x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x4 fn(uint2x4 y, uint2x4 x) { \
- return fn((float2x4)y, (float2x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x1 fn(uint3x1 y, uint3x1 x) { \
- return fn((float3x1)y, (float3x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x2 fn(uint3x2 y, uint3x2 x) { \
- return fn((float3x2)y, (float3x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x3 fn(uint3x3 y, uint3x3 x) { \
- return fn((float3x3)y, (float3x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x4 fn(uint3x4 y, uint3x4 x) { \
- return fn((float3x4)y, (float3x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x1 fn(uint4x1 y, uint4x1 x) { \
- return fn((float4x1)y, (float4x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x2 fn(uint4x2 y, uint4x2 x) { \
- return fn((float4x2)y, (float4x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x3 fn(uint4x3 y, uint4x3 x) { \
- return fn((float4x3)y, (float4x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x4 fn(uint4x4 y, uint4x4 x) { \
- return fn((float4x4)y, (float4x4)x); \
- } \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x1 fn(int64_t1x1 y, int64_t1x1 x) { \
- return fn((float1x1)y, (float1x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x2 fn(int64_t1x2 y, int64_t1x2 x) { \
- return fn((float1x2)y, (float1x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x3 fn(int64_t1x3 y, int64_t1x3 x) { \
- return fn((float1x3)y, (float1x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x4 fn(int64_t1x4 y, int64_t1x4 x) { \
- return fn((float1x4)y, (float1x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x1 fn(int64_t2x1 y, int64_t2x1 x) { \
- return fn((float2x1)y, (float2x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x2 fn(int64_t2x2 y, int64_t2x2 x) { \
- return fn((float2x2)y, (float2x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x3 fn(int64_t2x3 y, int64_t2x3 x) { \
- return fn((float2x3)y, (float2x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x4 fn(int64_t2x4 y, int64_t2x4 x) { \
- return fn((float2x4)y, (float2x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x1 fn(int64_t3x1 y, int64_t3x1 x) { \
- return fn((float3x1)y, (float3x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x2 fn(int64_t3x2 y, int64_t3x2 x) { \
- return fn((float3x2)y, (float3x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x3 fn(int64_t3x3 y, int64_t3x3 x) { \
- return fn((float3x3)y, (float3x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x4 fn(int64_t3x4 y, int64_t3x4 x) { \
- return fn((float3x4)y, (float3x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x1 fn(int64_t4x1 y, int64_t4x1 x) { \
- return fn((float4x1)y, (float4x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x2 fn(int64_t4x2 y, int64_t4x2 x) { \
- return fn((float4x2)y, (float4x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x3 fn(int64_t4x3 y, int64_t4x3 x) { \
- return fn((float4x3)y, (float4x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x4 fn(int64_t4x4 y, int64_t4x4 x) { \
- return fn((float4x4)y, (float4x4)x); \
- } \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x1 fn(uint64_t1x1 y, uint64_t1x1 x) { \
- return fn((float1x1)y, (float1x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x2 fn(uint64_t1x2 y, uint64_t1x2 x) { \
- return fn((float1x2)y, (float1x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x3 fn(uint64_t1x3 y, uint64_t1x3 x) { \
- return fn((float1x3)y, (float1x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float1x4 fn(uint64_t1x4 y, uint64_t1x4 x) { \
- return fn((float1x4)y, (float1x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x1 fn(uint64_t2x1 y, uint64_t2x1 x) { \
- return fn((float2x1)y, (float2x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x2 fn(uint64_t2x2 y, uint64_t2x2 x) { \
- return fn((float2x2)y, (float2x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x3 fn(uint64_t2x3 y, uint64_t2x3 x) { \
- return fn((float2x3)y, (float2x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float2x4 fn(uint64_t2x4 y, uint64_t2x4 x) { \
- return fn((float2x4)y, (float2x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x1 fn(uint64_t3x1 y, uint64_t3x1 x) { \
- return fn((float3x1)y, (float3x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x2 fn(uint64_t3x2 y, uint64_t3x2 x) { \
- return fn((float3x2)y, (float3x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x3 fn(uint64_t3x3 y, uint64_t3x3 x) { \
- return fn((float3x3)y, (float3x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float3x4 fn(uint64_t3x4 y, uint64_t3x4 x) { \
- return fn((float3x4)y, (float3x4)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x1 fn(uint64_t4x1 y, uint64_t4x1 x) { \
- return fn((float4x1)y, (float4x1)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x2 fn(uint64_t4x2 y, uint64_t4x2 x) { \
- return fn((float4x2)y, (float4x2)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x3 fn(uint64_t4x3 y, uint64_t4x3 x) { \
- return fn((float4x3)y, (float4x3)x); \
- } \
- \
- _DXC_DEPRECATED_INT_FN(fn) \
- constexpr float4x4 fn(uint64_t4x4 y, uint64_t4x4 x) { \
- return fn((float4x4)y, (float4x4)x); \
+ constexpr matrix<float, R, C> fn(matrix<uint64_t, R, C> y, \
+ matrix<uint64_t, R, C> x) { \
+ return fn((matrix<float, R, C>)y, (matrix<float, R, C>)x); \
}
+
//===----------------------------------------------------------------------===//
// acos builtins overloads
//===----------------------------------------------------------------------===//
diff --git a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
index fada02f4b7c1a..7b93ea089d854 100644
--- a/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/binary-compat-overload-warnings.hlsl
@@ -24,7 +24,7 @@ float4 test_binary_double4(double4 p0) {
}
float4x4 test_binary_double4x4(double4x4 p0) {
- // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x 64 bit API lowering for [[FUNC]] is deprecated. Explicitly cast parameters to 32 or 16 bit types.
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x 64 bit API lowering for [[FUNC]] is deprecated. Explicitly cast parameters to 32 or 16 bit types.
return FUNC(p0, p0);
}
@@ -51,21 +51,21 @@ float test_binary_int(uint64_t p0) {
}
float4x4 test_binary_uint4x4(uint4x4 p0) {
- // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_int4x4(int4x4 p0) {
- // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_int64_t4x4(int64_t4x4 p0) {
- // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
float4x4 test_binary_uint64_t4x4(uint64_t4x4 p0) {
- // ATAN2: warning: '[[FUNC]]' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
+ // ATAN2: warning: '[[FUNC]]<4U, 4U>' is deprecated: In 202x int lowering for [[FUNC]] is deprecated. Explicitly cast parameters to float types.
return FUNC(p0, p0);
}
>From 0637d22c8f6602660bf54ef2d5edce8d049ba42e Mon Sep 17 00:00:00 2001
From: joaosaffran <joaosaffran at gmail.com>
Date: Thu, 7 May 2026 18:49:31 -0700
Subject: [PATCH 18/20] Apply suggestion from @farzonl
Co-authored-by: Farzon Lotfi <farzonl at gmail.com>
---
clang/lib/Sema/SemaHLSL.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index bb996d291675e..79ba0de8e9ae1 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3233,10 +3233,13 @@ static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
int ArgOrdinal,
clang::QualType PassedType) {
clang::QualType BaseType = PassedType;
- if (PassedType->isVectorType())
- BaseType = PassedType->castAs<clang::VectorType>()->getElementType();
- else if (PassedType->isMatrixType())
- BaseType = PassedType->castAs<clang::MatrixType>()->getElementType();
+ if (const auto *VT = PassedType->getAs<clang::VectorType>()) {
+ BaseType = VT->getElementType();
+ } else {
+ const auto *MT = PassedType->getAs<clang::MatrixType>()
+ assert(MT && "expected to be Vector or MatrixType");
+ BaseType = MT->getElementType();
+ }
if (!BaseType->isHalfType() && !BaseType->isFloat32Type())
return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
>From fac7e0fc5cde502e144484443a4f8af6f9150184 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Thu, 7 May 2026 19:03:41 -0700
Subject: [PATCH 19/20] fix
---
clang/lib/Sema/SemaHLSL.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 79ba0de8e9ae1..0225645887a52 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3235,10 +3235,8 @@ static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
clang::QualType BaseType = PassedType;
if (const auto *VT = PassedType->getAs<clang::VectorType>()) {
BaseType = VT->getElementType();
- } else {
- const auto *MT = PassedType->getAs<clang::MatrixType>()
- assert(MT && "expected to be Vector or MatrixType");
- BaseType = MT->getElementType();
+ } else if (const auto *MT = PassedType->getAs<clang::MatrixType>()) {
+ BaseType = MT->getElementType();
}
if (!BaseType->isHalfType() && !BaseType->isFloat32Type())
>From 80852d3af1053d5d20f3397f3ba208b77b407125 Mon Sep 17 00:00:00 2001
From: Joao Saffran <joaosaffranllvm at gmail.com>
Date: Fri, 8 May 2026 11:16:05 -0700
Subject: [PATCH 20/20] fix test.. hopefully
---
llvm/test/CodeGen/DirectX/atan2_mat.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/atan2_mat.ll b/llvm/test/CodeGen/DirectX/atan2_mat.ll
index c47fd781b2ede..df37a5c5fa706 100644
--- a/llvm/test/CodeGen/DirectX/atan2_mat.ll
+++ b/llvm/test/CodeGen/DirectX/atan2_mat.ll
@@ -8,8 +8,8 @@ entry:
; Just Expansion, no scalarization or lowering:
; EXPCHECK: [[DIV:%.+]] = fdiv <16 x half> %y, %x
; EXPCHECK: [[ATAN:%.+]] = call <16 x half> @llvm.atan.v16f16(<16 x half> [[DIV]])
-; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x half> [[ATAN]], splat (half 0xH4248)
-; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x half> [[ATAN]], splat (half 0xH4248)
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <16 x half> [[ATAN]], splat (half
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <16 x half> [[ATAN]], splat (half
; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <16 x half> %x, zeroinitializer
; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <16 x half> %x, zeroinitializer
; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <16 x half> %y, zeroinitializer
More information about the cfe-commits
mailing list