[clang] [llvm] Adding splitdouble HLSL function (PR #109331)
Tex Riddell via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 22 14:13:26 PDT 2024
================
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-library %s -fnative-half-type -emit-llvm -O0 -o - | FileCheck %s --check-prefix=SPIRV
+
+
+
+// CHECK: define {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
+// CHECK: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]])
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
+// SPIRV: define spir_func {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
+// SPIRV-NOT: @llvm.dx.splitdouble.i32
+// SPIRV: [[REG:%.*]] = load double, ptr [[VALD]].addr, align 8
+// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[REG]] to <2 x i32>
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1
+uint test_scalar(double D) {
+ uint A, B;
+ asuint(D, A, B);
+ return A + B;
+}
+
+// CHECK: define {{.*}} i32 {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]])
+// CHECK: [[TRUNC:%.*]] = extractelement <1 x double> %D, i64 0
+// CHECK-NEXT: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[TRUNC]])
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
+// SPIRV: define spir_func {{.*}} i32 {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT: @llvm.dx.splitdouble.i32
+// SPIRV: [[REG:%.*]] = load <1 x double>, ptr [[VALD]].addr, align 8
+// SPIRV-NEXT: [[TRUNC:%.*]] = extractelement <1 x double> %1, i64 0
+// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[TRUNC]] to <2 x i32>
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1
+uint test_double1(double1 D) {
+ uint A, B;
+ asuint(D, A, B);
+ return A + B;
+}
+
+// CHECK: define {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]])
+// CHECK: [[VALRET:%.*]] = {{.*}} call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 1
+// SPIRV: define spir_func {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT: @llvm.dx.splitdouble.i32
+// SPIRV: [[REG:%.*]] = load <2 x double>, ptr [[VALD]].addr, align 16
+// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <2 x double> [[REG]] to <4 x i32>
+// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+uint2 test_vector2(double2 D) {
+ uint2 A, B;
+ asuint(D, A, B);
+ return A + B;
+}
+
+// CHECK: define {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
+// CHECK: [[VALRET:%.*]] = {{.*}} call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 1
+// SPIRV: define spir_func {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT: @llvm.dx.splitdouble.i32
+// SPIRV: [[REG:%.*]] = load <3 x double>, ptr [[VALD]].addr, align 32
+// SPIRV-NEXT: [[VALRET1:%.*]] = shufflevector <3 x double> [[REG]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
+// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <2 x double> [[VALRET1]] to <4 x i32>
+// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// SPIRV-NEXT: [[VALRET2:%.*]] = shufflevector <3 x double> [[REG]], <3 x double> poison, <1 x i32> <i32 2>
+// SPIRV-NEXT: [[CAST2:%.*]] = bitcast <1 x double> [[VALRET2]] to <2 x i32>
+// SPIRV-NEXT: [[SHUF3:%.*]] = shufflevector <2 x i32> [[CAST2]], <2 x i32> poison, <1 x i32> zeroinitializer
+// SPIRV-NEXT: [[SHUF4:%.*]] = shufflevector <2 x i32> [[CAST2]], <2 x i32> poison, <1 x i32> <i32 1>
+// SPIRV-NEXT: [[SHUF5:%.*]] = shufflevector <1 x i32> [[SHUF3]], <1 x i32> poison, <2 x i32> zeroinitializer
+// SPIRV-NEXT: [[SHUF6:%.*]] = shufflevector <1 x i32> [[SHUF4]], <1 x i32> poison, <2 x i32> zeroinitializer
+// SPIRV-NEXT: shufflevector <2 x i32> %4, <2 x i32> [[SHUF5]], <3 x i32> <i32 0, i32 1, i32 2>
+// SPIRV-NEXT: shufflevector <2 x i32> %5, <2 x i32> [[SHUF6]], <3 x i32> <i32 0, i32 1, i32 2>
----------------
tex3d wrote:
This is different than the sequence I expected.
For one thing, vector of size 1 will generate invalid SPIR-V IR, which will fail SPIRV validation. You have to use extractelement for the last element, then a scalar double to vector i32 bitcast, then shuffles.
For another, I thought you were going to combine two pairs, or one pair plus one scalar in one shuffle at the end. This approach is doing shuffling for each pair of doubles separately, which results in more problematic intermediate size-1 vectors.
Here's an example of what I was expecting in the IR, along with the SPIR-V checks in the context of the SPIR-V lowering test:
```ll
define spir_func noundef <3 x i32> @test_vector3(<3 x double> noundef %D) local_unnamed_addr {
entry:
; CHECK-LABEL: ; -- Begin function test_vector3
; CHECK: %[[#param:]] = OpFunctionParameter %[[#vec_3_double]]
; CHECK: %[[#shuf1:]] = OpVectorShuffle %[[#vec_2_double]] %[[#param]] %[[#]] 0 1
%0 = shufflevector <3 x double> %D, <3 x double> poison, <2 x i32> <i32 0, i32 1>
; CHECK: %[[#extract2:]] = OpCompositeExtract %[[#double]] %[[#param]] 2
%1 = extractelement <3 x double> %D, i32 2
; CHECK: %[[#cast1:]] = OpBitcast %[[#vec_4_int_32]] %[[#shuf1]]
%2 = bitcast <2 x double> %0 to <4 x i32>
; CHECK: %[[#cast2:]] = OpBitcast %[[#vec_2_int_32]] %[[#extract2]]
%3 = bitcast double %1 to <2 x i32>
; CHECK: %[[#shuf3:]] = OpVectorShuffle %[[#vec_4_int_32]] %[[#cast2]] %[[#]] 0 1 2 3
%4 = shufflevector <2 x i32> %3, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK: %[[#]] = OpVectorShuffle %[[#vec_3_int_32]] %[[#cast1]] %[[#shuf3]] 0 2 4
%high = shufflevector <4 x i32> %2, <4 x i32> %4, <3 x i32> <i32 0, i32 2, i32 4>
; CHECK: %[[#]] = OpVectorShuffle %[[#vec_3_int_32]] %[[#cast1]] %[[#shuf3]] 1 3 5
%low = shufflevector <4 x i32> %2, <4 x i32> %4, <3 x i32> <i32 1, i32 3, i32 5>
%add = add <3 x i32> %high, %low
ret <3 x i32> %add
}
```
https://github.com/llvm/llvm-project/pull/109331
More information about the cfe-commits
mailing list